diff --git a/autotest.sh b/autotest.sh index c2f9b586c..29bf57bb4 100755 --- a/autotest.sh +++ b/autotest.sh @@ -54,6 +54,7 @@ timing_exit nvmf_setup timing_enter lib +time test/lib/bdev/blockdev.sh time test/lib/event/event.sh time test/lib/nvme/nvme.sh time test/lib/nvmf/nvmf.sh diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h new file mode 100644 index 000000000..c4eac59c7 --- /dev/null +++ b/include/spdk/bdev.h @@ -0,0 +1,431 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Block device abstraction layer + */ + +#ifndef SPDK_BDEV_H_ +#define SPDK_BDEV_H_ + +#include +#include +#include /* for offsetof */ +#include /* for struct iovec */ +#include + +#include "spdk/event.h" +#include "spdk/queue.h" +#include "spdk/scsi_spec.h" + +#define SPDK_BDEV_SMALL_RBUF_MAX_SIZE 8192 +#define SPDK_BDEV_LARGE_RBUF_MAX_SIZE (64 * 1024) + +#define SPDK_BDEV_MAX_NAME_LENGTH 16 +#define SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH 50 + +struct spdk_bdev_io; + +/** \page block_backend_modules Block Device Backend Modules + +To implement a backend block device driver, a number of functions +dictated by struct spdk_bdev_fn_table must be provided. + +The module should register itself using SPDK_BDEV_MODULE_REGISTER or +SPDK_VBDEV_MODULE_REGISTER to define the parameters for the module. + +Use SPDK_BDEV_MODULE_REGISTER for all block backends that are real disks. +Any virtual backends such as RAID, partitioning, etc. should use +SPDK_VBDEV_MODULE_REGISTER. + +
+ +In the module initialization code, the config file sections can be parsed to +acquire custom configuration parameters. For example, if the config file has +a section such as below: +
+[MyBE]
+  MyParam 1234
+
+ +The value can be extracted as the example below: +
+struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "MyBe");
+int my_param = spdk_conf_section_get_intval(sp, "MyParam");
+
+ +The backend initialization routine also need to create "disks". A virtual +representation of each LUN must be constructed. Mainly a struct spdk_bdev +must be passed to the bdev database via spdk_bdev_register(). + +*/ + +/** + * \brief SPDK block device. + * + * This is a virtual representation of a block device that is exported by the backend. + */ +struct spdk_bdev { + /** User context passed in by the backend */ + void *ctxt; + + /** Unique name for this block device. */ + char name[SPDK_BDEV_MAX_NAME_LENGTH]; + + /** Unique product name for this kind of block device. */ + char product_name[SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH]; + + /** Size in bytes of a logical block for the backend */ + uint64_t blocklen; + + /** Number of blocks */ + uint64_t blockcnt; + + /** write cache enabled, not used at the moment */ + int write_cache; + + /** + * This is used to make sure buffers are sector aligned. + * This causes double buffering on writes. + */ + int need_aligned_buffer; + + /** thin provisioning, not used at the moment */ + int thin_provisioning; + + /** function table for all LUN ops */ + struct spdk_bdev_fn_table *fn_table; + + /** Represents maximum unmap block descriptor count */ + uint32_t max_unmap_bdesc_count; + + /** array of child block dev that is underneath of the current dev */ + struct spdk_bdev **child_bdevs; + + /** number of child blockdevs allocated */ + int num_child_bdevs; + + /** generation value used by block device reset */ + uint32_t gencnt; + + /** Whether the poller is registered with the reactor */ + bool is_running; + + /** Poller to submit IO and check completion */ + struct spdk_poller poller; + + /** True if another blockdev or a LUN is using this device */ + bool claimed; +}; + +/** + * Function table for a block device backend. + * + * The backend block device function table provides a set of APIs to allow + * communication with a backend. The main commands are read/write API + * calls for I/O via submit_request. + */ +struct spdk_bdev_fn_table { + /** Destroy the backend block device object */ + int (*destruct)(struct spdk_bdev *bdev); + + /** Poll the backend for I/O waiting to be completed. */ + int (*check_io)(struct spdk_bdev *bdev); + + /** Process the IO. */ + void (*submit_request)(struct spdk_bdev_io *); + + /** Release buf for read command. */ + void (*free_request)(struct spdk_bdev_io *); +}; + +/** Blockdev I/O type */ +enum spdk_bdev_io_type { + SPDK_BDEV_IO_TYPE_INVALID, + SPDK_BDEV_IO_TYPE_READ, + SPDK_BDEV_IO_TYPE_WRITE, + SPDK_BDEV_IO_TYPE_UNMAP, + SPDK_BDEV_IO_TYPE_FLUSH, + SPDK_BDEV_IO_TYPE_RESET, +}; + +/** Blockdev I/O completion status */ +enum spdk_bdev_io_status { + SPDK_BDEV_IO_STATUS_FAILED = -1, + SPDK_BDEV_IO_STATUS_PENDING = 0, + SPDK_BDEV_IO_STATUS_SUCCESS = 1, +}; + +/** Blockdev reset operation type */ +enum spdk_bdev_reset_type { + /** + * A hard reset indicates that the blockdev layer should not + * invoke the completion callback for I/Os issued before the + * reset is issued but completed after the reset is complete. + */ + SPDK_BDEV_RESET_HARD, + + /** + * A soft reset indicates that the blockdev layer should still + * invoke the completion callback for I/Os issued before the + * reset is issued but completed after the reset is complete. + */ + SPDK_BDEV_RESET_SOFT, +}; + +typedef spdk_event_fn spdk_bdev_io_completion_cb; +typedef void (*spdk_bdev_io_get_rbuf_cb)(struct spdk_bdev_io *bdev_io); + +/** + * Block device I/O + * + * This is an I/O that is passed to an spdk_bdev. + */ +struct spdk_bdev_io { + /** Pointer to scratch area reserved for use by the driver consuming this spdk_bdev_io. */ + void *ctx; + + /** Generation value for each I/O. */ + uint32_t gencnt; + + /** The block device that this I/O belongs to. */ + struct spdk_bdev *bdev; + + /** Enumerated value representing the I/O type. */ + enum spdk_bdev_io_type type; + + union { + struct { + + /** The unaligned rbuf originally allocated. */ + void *buf_unaligned; + + /** For single buffer cases, pointer to the aligned data buffer. */ + void *buf; + + /** For single buffer cases, size of the data buffer. */ + uint64_t nbytes; + + /** Starting offset (in bytes) of the blockdev for this I/O. */ + uint64_t offset; + + /** Indicate whether the blockdev layer to put rbuf or not. */ + bool put_rbuf; + } read; + struct { + /** For basic write case, use our own iovec element */ + struct iovec iov; + + /** For SG buffer cases, array of iovecs to transfer. */ + struct iovec *iovs; + + /** For SG buffer cases, number of iovecs in iovec array. */ + int iovcnt; + + /** For SG buffer cases, total size of data to be transferred. */ + size_t len; + + /** Starting offset (in bytes) of the blockdev for this I/O. */ + uint64_t offset; + } write; + struct { + /** Represents the unmap block descriptors. */ + struct spdk_scsi_unmap_bdesc *unmap_bdesc; + + /** Count of unmap block descriptors. */ + uint16_t bdesc_count; + } unmap; + struct { + /** Represents starting offset in bytes of the range to be flushed. */ + uint64_t offset; + + /** Represents the number of bytes to be flushed, starting at offset. */ + uint64_t length; + } flush; + struct { + int32_t type; + } reset; + } u; + + /** User function that will be called when this completes */ + spdk_bdev_io_completion_cb cb; + + /** Context that will be passed to the completion callback */ + void *caller_ctx; + + struct spdk_event *cb_event; + + /** Callback for when rbuf is allocated */ + spdk_bdev_io_get_rbuf_cb get_rbuf_cb; + + /** Status for the IO */ + enum spdk_bdev_io_status status; + + /** Used in virtual device (e.g., RAID), indicates its parent spdk_bdev_io **/ + void *parent; + + /** Used in virtual device (e.g., RAID) for storing multiple child device I/Os **/ + TAILQ_HEAD(child_io, spdk_bdev_io) child_io; + + /** Member used for linking child I/Os together. */ + TAILQ_ENTRY(spdk_bdev_io) link; + + /** Number of children for this I/O */ + int children; + + /** Entry to the list need_buf of struct spdk_bdev. */ + TAILQ_ENTRY(spdk_bdev_io) rbuf_link; + + /** Per I/O context for use by the blockdev module */ + uint8_t driver_ctx[0]; + + /* No members may be added after driver_ctx! */ +}; + +/** Block device module */ +struct spdk_bdev_module_if { + /** + * Initialization function for the module. Called by the spdk + * application during startup. + * + * Modules are required to define this function. + */ + int (*module_init)(void); + + /** + * Finish function for the module. Called by the spdk application + * before the spdk application exits to perform any necessary cleanup. + * + * Modules are not required to define this function. + */ + void (*module_fini)(void); + + /** + * Function called to return a text string representing the + * module's configuration options for inclusion in a configuration file. + */ + void (*config_text)(FILE *fp); + + /** Name for the modules being defined. */ + const char *module_name; + + /** + * Returns the allocation size required for the backend for uses such as local + * command structs, local SGL, iovecs, or other user context. + */ + int (*get_ctx_size)(void); + + TAILQ_ENTRY(spdk_bdev_module_if) tailq; +}; + +/* The blockdev API has two distinct parts. The first portion of the API + * is to be used by the layer above the blockdev in order to communicate + * with it. The second portion of the API is to be used by the blockdev + * modules themselves to perform operations like completing I/O. + */ + +/* The following functions are intended to be called from the upper layer + * that is using the blockdev layer. + */ +struct spdk_bdev_io *spdk_bdev_read(struct spdk_bdev *bdev, + void *buf, uint64_t nbytes, uint64_t offset, + spdk_bdev_io_completion_cb cb, void *cb_arg); +struct spdk_bdev_io *spdk_bdev_write(struct spdk_bdev *bdev, + void *buf, uint64_t nbytes, uint64_t offset, + spdk_bdev_io_completion_cb cb, void *cb_arg); +struct spdk_bdev_io *spdk_bdev_writev(struct spdk_bdev *bdev, + struct iovec *iov, int iovcnt, + uint64_t len, uint64_t offset, + spdk_bdev_io_completion_cb cb, void *cb_arg); +struct spdk_bdev_io *spdk_bdev_unmap(struct spdk_bdev *bdev, + struct spdk_scsi_unmap_bdesc *unmap_d, + uint16_t bdesc_count, + spdk_bdev_io_completion_cb cb, void *cb_arg); +struct spdk_bdev_io *spdk_bdev_flush(struct spdk_bdev *bdev, + uint64_t offset, uint64_t length, + spdk_bdev_io_completion_cb cb, void *cb_arg); +int spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io); +void spdk_bdev_do_work(void *ctx); +int spdk_bdev_reset(struct spdk_bdev *bdev, int reset_type, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +/* The remaining functions are intended to be called from within + * blockdev modules. + */ +void spdk_bdev_register(struct spdk_bdev *bdev); +void spdk_bdev_unregister(struct spdk_bdev *bdev); +int spdk_bdev_free_io(struct spdk_bdev_io *bdev_io); +void spdk_bdev_io_get_rbuf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_rbuf_cb cb); +struct spdk_bdev_io *spdk_bdev_get_io(void); +struct spdk_bdev_io *spdk_bdev_get_child_io(struct spdk_bdev_io *parent, + struct spdk_bdev *bdev, + spdk_bdev_io_completion_cb cb, + void *cb_arg); +void spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, + enum spdk_bdev_io_status status); +void spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module); +void spdk_vbdev_module_list_add(struct spdk_bdev_module_if *vbdev_module); + +static inline struct spdk_bdev_io * +spdk_bdev_io_from_ctx(void *ctx) +{ + return (struct spdk_bdev_io *) + ((uintptr_t)ctx - offsetof(struct spdk_bdev_io, driver_ctx)); +} + +#define SPDK_BDEV_MODULE_REGISTER(init_fn, fini_fn, config_fn, ctx_size_fn) \ + static struct spdk_bdev_module_if init_fn ## _if = { \ + .module_init = init_fn, \ + .module_fini = fini_fn, \ + .config_text = config_fn, \ + .get_ctx_size = ctx_size_fn, \ + }; \ + __attribute__((constructor)) static void init_fn ## _init(void) \ + { \ + spdk_bdev_module_list_add(&init_fn ## _if); \ + } + +#define SPDK_VBDEV_MODULE_REGISTER(init_fn, fini_fn, config_fn, ctx_size_fn) \ + static struct spdk_bdev_module_if init_fn ## _if = { \ + .module_init = init_fn, \ + .module_fini = fini_fn, \ + .config_text = config_fn, \ + .get_ctx_size = ctx_size_fn, \ + }; \ + __attribute__((constructor)) static void init_fn ## _init(void) \ + { \ + spdk_vbdev_module_list_add(&init_fn ## _if); \ + } + +#endif /* SPDK_BDEV_H_ */ diff --git a/include/spdk/bdev_db.h b/include/spdk/bdev_db.h new file mode 100644 index 000000000..ffa9607dd --- /dev/null +++ b/include/spdk/bdev_db.h @@ -0,0 +1,57 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Block device database + */ + +#ifndef SPDK_BDEV_DB_H_ +#define SPDK_BDEV_DB_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_bdev; + +int spdk_bdev_db_add(struct spdk_bdev *bdev); +int spdk_bdev_db_delete(struct spdk_bdev *bdev); + +struct spdk_bdev *spdk_bdev_db_get_by_name(const char *bdev_name); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/spdk/scsi_spec.h b/include/spdk/scsi_spec.h index 87fc0ac97..9c15c26ab 100644 --- a/include/spdk/scsi_spec.h +++ b/include/spdk/scsi_spec.h @@ -40,6 +40,11 @@ #define SPDK_SCSI_SPEC_H #include +#ifdef __linux__ +#include +#elif defined(__FreeBSD__) +#include +#endif #include "spdk/assert.h" diff --git a/lib/Makefile b/lib/Makefile index a16399749..1cf5fc821 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y += conf copy cunit event json jsonrpc log memory rpc trace util nvme nvmf ioat +DIRS-y += bdev conf copy cunit event json jsonrpc log memory rpc trace util nvme nvmf ioat .PHONY: all clean $(DIRS-y) diff --git a/lib/bdev/Makefile b/lib/bdev/Makefile new file mode 100644 index 000000000..a34a566cd --- /dev/null +++ b/lib/bdev/Makefile @@ -0,0 +1,43 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +CFLAGS += $(DPDK_INC) +C_SRCS = bdev.c bdev_db.c +LIBNAME = bdev + +DIRS-y += malloc nvme + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c new file mode 100644 index 000000000..dfea0f6d5 --- /dev/null +++ b/lib/bdev/bdev.c @@ -0,0 +1,807 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/bdev.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "spdk/bdev_db.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/queue.h" + +#define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) +#define RBUF_SMALL_POOL_SIZE 8192 +#define RBUF_LARGE_POOL_SIZE 1024 + +static struct rte_mempool *spdk_bdev_g_io_pool = NULL; +static struct rte_mempool *g_rbuf_small_pool = NULL; +static struct rte_mempool *g_rbuf_large_pool = NULL; + +typedef TAILQ_HEAD(, spdk_bdev_io) need_rbuf_tailq_t; +static need_rbuf_tailq_t g_need_rbuf_small[RTE_MAX_LCORE]; +static need_rbuf_tailq_t g_need_rbuf_large[RTE_MAX_LCORE]; + +static TAILQ_HEAD(, spdk_bdev_module_if) spdk_bdev_module_list = + TAILQ_HEAD_INITIALIZER(spdk_bdev_module_list); +static TAILQ_HEAD(, spdk_bdev_module_if) spdk_vbdev_module_list = + TAILQ_HEAD_INITIALIZER(spdk_vbdev_module_list); + +static void +spdk_bdev_io_set_rbuf(struct spdk_bdev_io *bdev_io, void *buf) +{ + RTE_VERIFY(bdev_io->get_rbuf_cb != NULL); + RTE_VERIFY(buf != NULL); + bdev_io->u.read.buf_unaligned = buf; + bdev_io->u.read.buf = (void *)((unsigned long)((char *)buf + 512) & ~511UL); + bdev_io->u.read.put_rbuf = true; + bdev_io->get_rbuf_cb(bdev_io); +} + +static void +spdk_bdev_io_put_rbuf(struct spdk_bdev_io *bdev_io) +{ + struct rte_mempool *pool; + void *buf; + need_rbuf_tailq_t *tailq; + uint64_t length; + + length = bdev_io->u.read.nbytes; + buf = bdev_io->u.read.buf_unaligned; + + if (length <= SPDK_BDEV_SMALL_RBUF_MAX_SIZE) { + pool = g_rbuf_small_pool; + tailq = &g_need_rbuf_small[rte_lcore_id()]; + } else { + pool = g_rbuf_large_pool; + tailq = &g_need_rbuf_large[rte_lcore_id()]; + } + + if (TAILQ_EMPTY(tailq)) { + rte_mempool_put(pool, buf); + } else { + bdev_io = TAILQ_FIRST(tailq); + TAILQ_REMOVE(tailq, bdev_io, rbuf_link); + spdk_bdev_io_set_rbuf(bdev_io, buf); + } +} + +static int spdk_initialize_rbuf_pool(void) +{ + int cache_size; + + /** + * Ensure no more than half of the total buffers end up local caches, by + * using spdk_event_get_active_core_count() to determine how many local caches we need + * to account for. + */ + cache_size = RBUF_SMALL_POOL_SIZE / (2 * spdk_app_get_core_count()); + if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) + cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; + g_rbuf_small_pool = rte_mempool_create("rbuf_small_pool", + RBUF_SMALL_POOL_SIZE, + SPDK_BDEV_SMALL_RBUF_MAX_SIZE + 512, + cache_size, 0, NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + if (!g_rbuf_small_pool) { + SPDK_ERRLOG("create rbuf small pool failed\n"); + return -1; + } + + cache_size = RBUF_LARGE_POOL_SIZE / (2 * spdk_app_get_core_count()); + if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) + cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; + g_rbuf_large_pool = rte_mempool_create("rbuf_large_pool", + RBUF_LARGE_POOL_SIZE, + SPDK_BDEV_LARGE_RBUF_MAX_SIZE + 512, + cache_size, 0, NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + if (!g_rbuf_large_pool) { + SPDK_ERRLOG("create rbuf large pool failed\n"); + return -1; + } + + return 0; +} + +static int +spdk_bdev_module_get_max_ctx_size(void) +{ + struct spdk_bdev_module_if *bdev_module; + int max_bdev_module_size = 0; + + TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { + if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { + max_bdev_module_size = bdev_module->get_ctx_size(); + } + } + + TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { + if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { + max_bdev_module_size = bdev_module->get_ctx_size(); + } + } + + return max_bdev_module_size; +} + +static int +spdk_bdev_module_initialize(void) +{ + struct spdk_bdev_module_if *bdev_module; + int rc = 0; + + TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { + rc = bdev_module->module_init(); + if (rc) + return rc; + } + TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { + rc = bdev_module->module_init(); + if (rc) + return rc; + } + return rc; +} + +static void +spdk_bdev_module_finish(void) +{ + struct spdk_bdev_module_if *bdev_module; + + TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { + if (bdev_module->module_fini) { + bdev_module->module_fini(); + } + } + + TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { + if (bdev_module->module_fini) { + bdev_module->module_fini(); + } + } +} + +static void +spdk_bdev_config_text(FILE *fp) +{ + struct spdk_bdev_module_if *bdev_module; + + TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { + if (bdev_module->config_text) { + bdev_module->config_text(fp); + } + } + TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { + if (bdev_module->config_text) { + bdev_module->config_text(fp); + } + } +} + +static int +spdk_bdev_initialize(void) +{ + int i; + + if (spdk_bdev_module_initialize()) { + SPDK_ERRLOG("bdev module initialize failed"); + return -1; + } + + spdk_bdev_g_io_pool = rte_mempool_create("blockdev_io", + SPDK_BDEV_IO_POOL_SIZE, + sizeof(struct spdk_bdev_io) + + spdk_bdev_module_get_max_ctx_size(), + 64, 0, + NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + + if (spdk_bdev_g_io_pool == NULL) { + SPDK_ERRLOG("could not allocate spdk_bdev_io pool"); + return -1; + } + + for (i = 0; i < RTE_MAX_LCORE; i++) { + TAILQ_INIT(&g_need_rbuf_small[i]); + TAILQ_INIT(&g_need_rbuf_large[i]); + } + + return spdk_initialize_rbuf_pool(); +} + +/* + * Wrapper to provide rte_mempool_avail_count() on older DPDK versions. + * Drop this if the minimum DPDK version is raised to at least 16.07. + */ +#if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1) +static unsigned rte_mempool_avail_count(const struct rte_mempool *pool) +{ + return rte_mempool_count(pool); +} +#endif + +static int +spdk_bdev_check_pool(struct rte_mempool *pool, uint32_t count) +{ + if (rte_mempool_avail_count(pool) != count) { + SPDK_ERRLOG("rte_mempool_avail_count(%s) == %d, should be %d\n", + pool->name, rte_mempool_avail_count(pool), count); + return -1; + } else { + return 0; + } +} + +static int +spdk_bdev_finish(void) +{ + int rc = 0; + + spdk_bdev_module_finish(); + + rc += spdk_bdev_check_pool(g_rbuf_small_pool, RBUF_SMALL_POOL_SIZE); + rc += spdk_bdev_check_pool(g_rbuf_large_pool, RBUF_LARGE_POOL_SIZE); + + return (rc != 0); +} + +struct spdk_bdev_io *spdk_bdev_get_io(void) +{ + struct spdk_bdev_io *bdev_io; + int rc; + + rc = rte_mempool_get(spdk_bdev_g_io_pool, (void **)&bdev_io); + if (rc < 0 || !bdev_io) { + SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); + rte_panic("no memory\n"); + } + + memset(bdev_io, 0, sizeof(*bdev_io)); + + return bdev_io; +} + +static void +spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) +{ + if (!bdev_io) { + return; + } + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ && bdev_io->u.read.put_rbuf) { + spdk_bdev_io_put_rbuf(bdev_io); + } + + rte_mempool_put(spdk_bdev_g_io_pool, bdev_io); +} + +static void +_spdk_bdev_io_get_rbuf(struct spdk_bdev_io *bdev_io) +{ + uint64_t len = bdev_io->u.read.nbytes; + struct rte_mempool *pool; + need_rbuf_tailq_t *tailq; + int rc; + void *buf = NULL; + + if (len <= SPDK_BDEV_SMALL_RBUF_MAX_SIZE) { + pool = g_rbuf_small_pool; + tailq = &g_need_rbuf_small[rte_lcore_id()]; + } else { + pool = g_rbuf_large_pool; + tailq = &g_need_rbuf_large[rte_lcore_id()]; + } + + rc = rte_mempool_get(pool, (void **)&buf); + if (rc < 0 || !buf) { + TAILQ_INSERT_TAIL(tailq, bdev_io, rbuf_link); + } else { + spdk_bdev_io_set_rbuf(bdev_io, buf); + } +} + + +static void +spdk_bdev_cleanup_pending_rbuf_io(struct spdk_bdev *bdev) +{ + struct spdk_bdev_io *bdev_io, *tmp; + + TAILQ_FOREACH_SAFE(bdev_io, &g_need_rbuf_small[rte_lcore_id()], rbuf_link, tmp) { + if (bdev_io->bdev == bdev) { + TAILQ_REMOVE(&g_need_rbuf_small[rte_lcore_id()], bdev_io, rbuf_link); + bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; + } + } + + TAILQ_FOREACH_SAFE(bdev_io, &g_need_rbuf_large[rte_lcore_id()], rbuf_link, tmp) { + if (bdev_io->bdev == bdev) { + TAILQ_REMOVE(&g_need_rbuf_large[rte_lcore_id()], bdev_io, rbuf_link); + bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; + } + } +} + +static void +spdk_bdev_io_free_request(struct spdk_bdev_io *bdev_io) +{ + bdev_io->bdev->fn_table->free_request(bdev_io); + spdk_bdev_put_io(bdev_io); +} + +static void +__submit_request(spdk_event_t event) +{ + struct spdk_bdev *bdev = spdk_event_get_arg1(event); + struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event); + + bdev_io->cb_event = spdk_event_get_next(event); + + if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { + if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { + spdk_bdev_cleanup_pending_rbuf_io(bdev); + } + bdev->fn_table->submit_request(bdev_io); + } else { + spdk_bdev_io_free_request(bdev_io); + } +} + +void +spdk_bdev_do_work(void *ctx) +{ + struct spdk_bdev *bdev = ctx; + + bdev->fn_table->check_io(bdev->ctxt); +} + +int +spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) +{ + struct spdk_bdev *bdev = bdev_io->bdev; + struct spdk_event *event, *cb_event = NULL; + uint32_t lcore = bdev->poller.lcore; + + /* start the poller when first IO comes */ + if (!bdev->is_running) { + bdev->is_running = true; + if (lcore == 0) { + lcore = rte_lcore_id(); + } + spdk_poller_register(&bdev->poller, lcore, NULL); + } + + if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { + cb_event = spdk_event_allocate(rte_lcore_id(), bdev_io->cb, + bdev_io->caller_ctx, bdev_io, NULL); + RTE_VERIFY(cb_event != NULL); + } + + event = spdk_event_allocate(lcore, __submit_request, bdev, bdev_io, cb_event); + + RTE_VERIFY(event != NULL); + spdk_event_call(event); + + return 0; +} + +static void +spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, + struct spdk_bdev *bdev, void *cb_arg, + spdk_bdev_io_completion_cb cb) +{ + bdev_io->bdev = bdev; + bdev_io->ctx = bdev->ctxt; + bdev_io->caller_ctx = cb_arg; + bdev_io->cb = cb; + bdev_io->gencnt = bdev->gencnt; + bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; + bdev_io->children = 0; + TAILQ_INIT(&bdev_io->child_io); +} + +struct spdk_bdev_io * +spdk_bdev_get_child_io(struct spdk_bdev_io *parent, + struct spdk_bdev *bdev, + spdk_bdev_io_completion_cb cb, + void *cb_arg) +{ + struct spdk_bdev_io *child; + + child = spdk_bdev_get_io(); + if (!child) { + SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); + return NULL; + } + + if (cb_arg == NULL) { + cb_arg = child; + } + + spdk_bdev_io_init(child, bdev, cb_arg, cb); + + child->type = parent->type; + memcpy(&child->u, &parent->u, sizeof(child->u)); + if (child->type == SPDK_BDEV_IO_TYPE_READ) { + child->u.read.put_rbuf = false; + } + child->get_rbuf_cb = NULL; + child->parent = parent; + + TAILQ_INSERT_TAIL(&parent->child_io, child, link); + parent->children++; + + return child; +} + +struct spdk_bdev_io * +spdk_bdev_read(struct spdk_bdev *bdev, + void *buf, uint64_t nbytes, uint64_t offset, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev_io *bdev_io; + int rc; + + /* Return failure if nbytes is not a multiple of bdev->blocklen */ + if (nbytes % bdev->blocklen) { + return NULL; + } + + /* Return failure if offset + nbytes is less than offset; indicates there + * has been an overflow and hence the offset has been wrapped around */ + if ((offset + nbytes) < offset) { + return NULL; + } + + /* Return failure if offset + nbytes exceeds the size of the blockdev */ + if ((offset + nbytes) > (bdev->blockcnt * bdev->blocklen)) { + return NULL; + } + + bdev_io = spdk_bdev_get_io(); + if (!bdev_io) { + SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); + return NULL; + } + + bdev_io->type = SPDK_BDEV_IO_TYPE_READ; + bdev_io->u.read.buf = buf; + bdev_io->u.read.nbytes = nbytes; + bdev_io->u.read.offset = offset; + spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); + + rc = spdk_bdev_io_submit(bdev_io); + if (rc < 0) { + spdk_bdev_put_io(bdev_io); + return NULL; + } + + return bdev_io; +} + +struct spdk_bdev_io * +spdk_bdev_write(struct spdk_bdev *bdev, + void *buf, uint64_t nbytes, uint64_t offset, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev_io *bdev_io; + int rc; + + /* Return failure if nbytes is not a multiple of bdev->blocklen */ + if (nbytes % bdev->blocklen) { + return NULL; + } + + /* Return failure if offset + nbytes is less than offset; indicates there + * has been an overflow and hence the offset has been wrapped around */ + if ((offset + nbytes) < offset) { + return NULL; + } + + /* Return failure if offset + nbytes exceeds the size of the blockdev */ + if ((offset + nbytes) > (bdev->blockcnt * bdev->blocklen)) { + return NULL; + } + + bdev_io = spdk_bdev_get_io(); + if (!bdev_io) { + SPDK_ERRLOG("blockdev_io memory allocation failed duing writev\n"); + return NULL; + } + + bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; + bdev_io->u.write.iov.iov_base = buf; + bdev_io->u.write.iov.iov_len = nbytes; + bdev_io->u.write.iovs = &bdev_io->u.write.iov; + bdev_io->u.write.iovcnt = 1; + bdev_io->u.write.len = nbytes; + bdev_io->u.write.offset = offset; + spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); + + rc = spdk_bdev_io_submit(bdev_io); + if (rc < 0) { + spdk_bdev_put_io(bdev_io); + return NULL; + } + + return bdev_io; +} + +struct spdk_bdev_io * +spdk_bdev_writev(struct spdk_bdev *bdev, + struct iovec *iov, int iovcnt, + uint64_t len, uint64_t offset, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev_io *bdev_io; + int rc; + + /* Return failure if len is not a multiple of bdev->blocklen */ + if (len % bdev->blocklen) { + return NULL; + } + + /* Return failure if offset + nbytes is less than offset; indicates there + * has been an overflow and hence the offset has been wrapped around */ + if ((offset + len) < offset) { + return NULL; + } + + /* Return failure if offset + len exceeds the size of the blockdev */ + if ((offset + len) > (bdev->blockcnt * bdev->blocklen)) { + return NULL; + } + + bdev_io = spdk_bdev_get_io(); + if (!bdev_io) { + SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); + return NULL; + } + + bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; + bdev_io->u.write.iovs = iov; + bdev_io->u.write.iovcnt = iovcnt; + bdev_io->u.write.len = len; + bdev_io->u.write.offset = offset; + spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); + + rc = spdk_bdev_io_submit(bdev_io); + if (rc < 0) { + spdk_bdev_put_io(bdev_io); + return NULL; + } + + return bdev_io; +} + +struct spdk_bdev_io * +spdk_bdev_unmap(struct spdk_bdev *bdev, + struct spdk_scsi_unmap_bdesc *unmap_d, + uint16_t bdesc_count, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev_io *bdev_io; + int rc; + + bdev_io = spdk_bdev_get_io(); + if (!bdev_io) { + SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); + return NULL; + } + + bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; + bdev_io->u.unmap.unmap_bdesc = unmap_d; + bdev_io->u.unmap.bdesc_count = bdesc_count; + spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); + + rc = spdk_bdev_io_submit(bdev_io); + if (rc < 0) { + spdk_bdev_put_io(bdev_io); + return NULL; + } + + return bdev_io; +} + +struct spdk_bdev_io * +spdk_bdev_flush(struct spdk_bdev *bdev, + uint64_t offset, uint64_t length, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev_io *bdev_io; + int rc; + + bdev_io = spdk_bdev_get_io(); + if (!bdev_io) { + SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); + return NULL; + } + + bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; + bdev_io->u.flush.offset = offset; + bdev_io->u.flush.length = length; + spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); + + rc = spdk_bdev_io_submit(bdev_io); + if (rc < 0) { + spdk_bdev_put_io(bdev_io); + return NULL; + } + + return bdev_io; +} + +int +spdk_bdev_reset(struct spdk_bdev *bdev, int reset_type, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev_io *bdev_io; + int rc; + + bdev_io = spdk_bdev_get_io(); + if (!bdev_io) { + SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); + return -1; + } + + bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; + bdev_io->u.reset.type = reset_type; + spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); + + rc = spdk_bdev_io_submit(bdev_io); + if (rc < 0) { + spdk_bdev_put_io(bdev_io); + SPDK_ERRLOG("reset failed\n"); + } + + return rc; +} + +int +spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) +{ + int rc; + + if (!bdev_io) { + SPDK_ERRLOG("bdev_io is NULL\n"); + return -1; + } + + if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { + SPDK_ERRLOG("bdev_io is in pending state\n"); + return -1; + } + + rc = spdk_bdev_io_submit(bdev_io); + if (rc < 0) { + spdk_bdev_put_io(bdev_io); + SPDK_ERRLOG("free_request failure\n"); + } + + return rc; +} + +void +spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) +{ + if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { + /* Successful reset */ + if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { + /* Increase the blockdev generation if it is a hard reset */ + if (bdev_io->u.reset.type == SPDK_BDEV_RESET_HARD) { + bdev_io->bdev->gencnt++; + } + } + } else { + /* + * Check the gencnt, to see if this I/O was issued before the most + * recent reset. If the gencnt is not equal, then just free the I/O + * without calling the callback, since the caller will have already + * freed its context for this I/O. + */ + if (bdev_io->bdev->gencnt != bdev_io->gencnt) { + spdk_bdev_put_io(bdev_io); + return; + } + } + + bdev_io->status = status; + + RTE_VERIFY(bdev_io->cb_event != NULL); + spdk_event_call(bdev_io->cb_event); +} + +void +spdk_bdev_register(struct spdk_bdev *bdev) +{ + /* initialize the reset generation value to zero */ + bdev->gencnt = 0; + bdev->is_running = false; + bdev->poller.fn = spdk_bdev_do_work; + bdev->poller.arg = bdev; + + spdk_bdev_db_add(bdev); +} + +void +spdk_bdev_unregister(struct spdk_bdev *bdev) +{ + int rc; + + spdk_bdev_db_delete(bdev); + + rc = bdev->fn_table->destruct(bdev->ctxt); + if (rc < 0) { + SPDK_ERRLOG("destruct failed\n"); + } + + if (bdev->is_running) { + spdk_poller_unregister(&bdev->poller, NULL); + bdev->is_running = false; + } +} + +void +spdk_bdev_io_get_rbuf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_rbuf_cb cb) +{ + RTE_VERIFY(cb != NULL); + + if (bdev_io->u.read.buf == NULL) { + bdev_io->get_rbuf_cb = cb; + _spdk_bdev_io_get_rbuf(bdev_io); + } else { + cb(bdev_io); + } +} + +void spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) +{ + TAILQ_INSERT_TAIL(&spdk_bdev_module_list, bdev_module, tailq); +} + +void spdk_vbdev_module_list_add(struct spdk_bdev_module_if *vbdev_module) +{ + TAILQ_INSERT_TAIL(&spdk_vbdev_module_list, vbdev_module, tailq); +} +SPDK_SUBSYSTEM_REGISTER(bdev, spdk_bdev_initialize, spdk_bdev_finish, spdk_bdev_config_text) +SPDK_SUBSYSTEM_DEPEND(bdev, copy) diff --git a/lib/bdev/bdev_db.c b/lib/bdev/bdev_db.c new file mode 100644 index 000000000..294f44b68 --- /dev/null +++ b/lib/bdev/bdev_db.c @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/bdev_db.h" + +#include +#include +#include +#include "spdk/bdev.h" +#include "spdk/log.h" + +struct spdk_db_entry { + struct spdk_bdev *bdev; + int claimed; + struct spdk_db_entry *next; +}; + +static struct spdk_db_entry *bdev_list_head = NULL; + +int spdk_bdev_db_add(struct spdk_bdev *bdev) +{ + struct spdk_db_entry *new_entry = calloc(1, sizeof(struct spdk_db_entry)); + + if (!new_entry) { + SPDK_ERRLOG("Failed to allocate DB entry\n"); + return -ENOMEM; + } + + new_entry->bdev = bdev; + new_entry->next = bdev_list_head; + bdev_list_head = new_entry; + + return 0; +} + +int spdk_bdev_db_delete(struct spdk_bdev *bdev) +{ + struct spdk_db_entry *prev = NULL; + struct spdk_db_entry *node = bdev_list_head; + + while (node != NULL) { + if (node->bdev == bdev) { + if (prev != NULL) { + prev->next = node->next; + } else { + bdev_list_head = node->next; + } + free(node); + break; + } + prev = node; + node = node->next; + } + + return 0; +} + +struct spdk_bdev *spdk_bdev_db_get_by_name(const char *bdev_name) +{ + struct spdk_db_entry *current = bdev_list_head; + + while (current != NULL) { + struct spdk_bdev *bdev = current->bdev; + + if (strncmp(bdev_name, bdev->name, sizeof(bdev->name)) == 0) { + current->claimed++; + return bdev; + } + + current = current->next; + } + + return NULL; +} diff --git a/lib/bdev/malloc/Makefile b/lib/bdev/malloc/Makefile new file mode 100644 index 000000000..566d486f5 --- /dev/null +++ b/lib/bdev/malloc/Makefile @@ -0,0 +1,41 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +CFLAGS += $(DPDK_INC) +C_SRCS = blockdev_malloc.c blockdev_malloc_rpc.c +LIBNAME = bdev_malloc + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/bdev/malloc/blockdev_malloc.c b/lib/bdev/malloc/blockdev_malloc.c new file mode 100644 index 000000000..b0cb21eb8 --- /dev/null +++ b/lib/bdev/malloc/blockdev_malloc.c @@ -0,0 +1,360 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include "blockdev_malloc.h" +#include "spdk/bdev.h" +#include "spdk/conf.h" +#include "spdk/log.h" +#include "spdk/copy_engine.h" + +struct malloc_disk { + struct spdk_bdev disk; /* this must be the first element */ + void *malloc_buf; + struct malloc_disk *next; +}; + +static void +malloc_done(void *ref, int status) +{ + struct copy_task *cp_task = (struct copy_task *)ref; + enum spdk_bdev_io_status bdev_status; + + if (status != 0) { + bdev_status = SPDK_BDEV_IO_STATUS_FAILED; + } else { + bdev_status = SPDK_BDEV_IO_STATUS_SUCCESS; + } + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(cp_task), bdev_status); +} + +static struct malloc_disk *g_malloc_disk_head = NULL; + +int malloc_disk_count = 0; + +static int blockdev_malloc_initialize(void); +static void blockdev_malloc_finish(void); +static void blockdev_malloc_get_spdk_running_config(FILE *fp); + +static int +blockdev_malloc_get_ctx_size(void) +{ + return spdk_copy_module_get_max_ctx_size(); +} + +SPDK_BDEV_MODULE_REGISTER(blockdev_malloc_initialize, blockdev_malloc_finish, + blockdev_malloc_get_spdk_running_config, blockdev_malloc_get_ctx_size) + +static void +blockdev_malloc_delete_from_list(struct malloc_disk *malloc_disk) +{ + struct malloc_disk *prev = NULL; + struct malloc_disk *node = g_malloc_disk_head; + + if (malloc_disk == NULL) + return; + + while (node != NULL) { + if (node == malloc_disk) { + if (prev != NULL) { + prev->next = malloc_disk->next; + } else { + g_malloc_disk_head = malloc_disk->next; + } + break; + } + prev = node; + node = node->next; + } +} + +static int +blockdev_malloc_destruct(struct spdk_bdev *bdev) +{ + struct malloc_disk *malloc_disk = (struct malloc_disk *)bdev; + blockdev_malloc_delete_from_list(malloc_disk); + rte_free(malloc_disk->malloc_buf); + rte_free(malloc_disk); + return 0; +} + +static int64_t +blockdev_malloc_read(struct malloc_disk *mdisk, struct copy_task *copy_req, + void *buf, uint64_t nbytes, off_t offset) +{ + SPDK_TRACELOG(SPDK_TRACE_MALLOC, "read %lu bytes from offset %#lx to %p\n", + nbytes, offset, buf); + + return spdk_copy_submit(copy_req, buf, mdisk->malloc_buf + offset, + nbytes, malloc_done); +} + +static int64_t +blockdev_malloc_writev(struct malloc_disk *mdisk, struct copy_task *copy_req, + struct iovec *iov, int iovcnt, size_t len, off_t offset) +{ + if ((iovcnt != 1) || (iov->iov_len != len)) + return -1; + + SPDK_TRACELOG(SPDK_TRACE_MALLOC, "wrote %lu bytes to offset %#lx from %p\n", + iov->iov_len, offset, iov->iov_base); + + return spdk_copy_submit(copy_req, mdisk->malloc_buf + offset, + iov->iov_base, len, malloc_done); +} + +static int +blockdev_malloc_check_io(struct spdk_bdev *bdev) +{ + return spdk_copy_check_io(); +} + +static int64_t +blockdev_malloc_flush(struct malloc_disk *mdisk, struct copy_task *copy_req, + uint64_t offset, uint64_t nbytes) +{ + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(copy_req), SPDK_BDEV_IO_STATUS_SUCCESS); + + return 0; +} + +static int +blockdev_malloc_reset(struct malloc_disk *mdisk, struct copy_task *copy_req) +{ + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(copy_req), SPDK_BDEV_IO_STATUS_SUCCESS); + + return 0; +} + +static int _blockdev_malloc_submit_request(struct spdk_bdev_io *bdev_io) +{ + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + if (bdev_io->u.read.buf == NULL) { + bdev_io->u.read.buf = ((struct malloc_disk *)bdev_io->ctx)->malloc_buf + + bdev_io->u.read.offset; + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bdev_io->driver_ctx), + SPDK_BDEV_IO_STATUS_SUCCESS); + return 0; + } + + return blockdev_malloc_read((struct malloc_disk *)bdev_io->ctx, + (struct copy_task *)bdev_io->driver_ctx, + bdev_io->u.read.buf, + bdev_io->u.read.nbytes, + bdev_io->u.read.offset); + + case SPDK_BDEV_IO_TYPE_WRITE: + return blockdev_malloc_writev((struct malloc_disk *)bdev_io->ctx, + (struct copy_task *)bdev_io->driver_ctx, + bdev_io->u.write.iovs, + bdev_io->u.write.iovcnt, + bdev_io->u.write.len, + bdev_io->u.write.offset); + + case SPDK_BDEV_IO_TYPE_RESET: + return blockdev_malloc_reset((struct malloc_disk *)bdev_io->ctx, + (struct copy_task *)bdev_io->driver_ctx); + + case SPDK_BDEV_IO_TYPE_FLUSH: + return blockdev_malloc_flush((struct malloc_disk *)bdev_io->ctx, + (struct copy_task *)bdev_io->driver_ctx, + bdev_io->u.flush.offset, + bdev_io->u.flush.length); + default: + return -1; + } + return 0; +} + +static void blockdev_malloc_submit_request(struct spdk_bdev_io *bdev_io) +{ + if (_blockdev_malloc_submit_request(bdev_io) < 0) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static void blockdev_malloc_free_request(struct spdk_bdev_io *bdev_io) +{ +} + +static struct spdk_bdev_fn_table malloc_fn_table = { + .destruct = blockdev_malloc_destruct, + .check_io = blockdev_malloc_check_io, + .submit_request = blockdev_malloc_submit_request, + .free_request = blockdev_malloc_free_request, +}; + +struct malloc_disk *create_malloc_disk(uint64_t num_blocks, uint32_t block_size) +{ + struct malloc_disk *mdisk; + + if (block_size % 512 != 0) { + SPDK_ERRLOG("Block size %u is not a multiple of 512.\n", block_size); + return NULL; + } + + if (num_blocks == 0) { + SPDK_ERRLOG("Disk must be more than 0 blocks\n"); + return NULL; + } + + mdisk = rte_zmalloc(NULL, sizeof(*mdisk), 0); + if (!mdisk) { + perror("mdisk"); + return NULL; + } + + /* + * Allocate the large backend memory buffer using rte_malloc(), + * so that we guarantee it is allocated from hugepage memory. + * + * TODO: need to pass a hint so we know which socket to allocate + * from on multi-socket systems. + */ + mdisk->malloc_buf = rte_zmalloc(NULL, num_blocks * block_size, 2 * 1024 * 1024); + if (!mdisk->malloc_buf) { + SPDK_ERRLOG("rte_zmalloc failed\n"); + rte_free(mdisk); + return NULL; + } + + snprintf(mdisk->disk.name, SPDK_BDEV_MAX_NAME_LENGTH, "Malloc%d", malloc_disk_count); + snprintf(mdisk->disk.product_name, SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH, "Malloc disk"); + malloc_disk_count++; + + mdisk->disk.write_cache = 1; + mdisk->disk.blocklen = block_size; + mdisk->disk.blockcnt = num_blocks; + + mdisk->disk.ctxt = mdisk; + mdisk->disk.fn_table = &malloc_fn_table; + + spdk_bdev_register(&mdisk->disk); + + mdisk->next = g_malloc_disk_head; + g_malloc_disk_head = mdisk; + + return mdisk; +} + +static void free_malloc_disk(struct malloc_disk *mdisk) +{ + rte_free(mdisk->malloc_buf); + rte_free(mdisk); +} + +static int blockdev_malloc_initialize() +{ + struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc"); + int NumberOfLuns, LunSizeInMB, BlockSize, i; + uint64_t size; + struct malloc_disk *mdisk; + + if (sp != NULL) { + NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns"); + LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB"); + BlockSize = spdk_conf_section_get_intval(sp, "BlockSize"); + if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) { + SPDK_ERRLOG("Malloc section present, but no devices specified\n"); + return EINVAL; + } + if (BlockSize < 1) { + /* Default is 512 bytes */ + BlockSize = 512; + } + size = (uint64_t)LunSizeInMB * 1024 * 1024; + for (i = 0; i < NumberOfLuns; i++) { + mdisk = create_malloc_disk(size / BlockSize, BlockSize); + if (mdisk == NULL) { + SPDK_ERRLOG("Could not create malloc disk\n"); + return EINVAL; + } + } + } + return 0; +} + +static void blockdev_malloc_finish() +{ + struct malloc_disk *mdisk; + + while (g_malloc_disk_head != NULL) { + mdisk = g_malloc_disk_head; + g_malloc_disk_head = mdisk->next; + free_malloc_disk(mdisk); + } +} + +static void +blockdev_malloc_get_spdk_running_config(FILE *fp) +{ + int num_malloc_luns = 0; + uint64_t malloc_lun_size = 0; + + /* count number of malloc LUNs, get LUN size */ + struct malloc_disk *mdisk = g_malloc_disk_head; + while (mdisk != NULL) { + if (0 == malloc_lun_size) { + /* assume all malloc luns the same size */ + malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt; + malloc_lun_size /= (1024 * 1024); + } + num_malloc_luns++; + mdisk = mdisk->next; + } + + if (num_malloc_luns > 0) { + fprintf(fp, + "\n" + "# Users may change this section to create a different number or size of\n" + "# malloc LUNs.\n" + "# This will generate %d LUNs with a malloc-allocated backend. Each LUN \n" + "# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n" + "# Not all LUNs defined here are necessarily used below.\n" + "[Malloc]\n" + " NumberOfLuns %d\n" + " LunSizeInMB %" PRIu64 "\n", + num_malloc_luns, malloc_lun_size, + num_malloc_luns - 1, num_malloc_luns, + malloc_lun_size); + } +} + +SPDK_LOG_REGISTER_TRACE_FLAG("malloc", SPDK_TRACE_MALLOC) diff --git a/lib/bdev/malloc/blockdev_malloc.h b/lib/bdev/malloc/blockdev_malloc.h new file mode 100644 index 000000000..ec207a1b5 --- /dev/null +++ b/lib/bdev/malloc/blockdev_malloc.h @@ -0,0 +1,43 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_BLOCKDEV_MALLOC_H +#define SPDK_BLOCKDEV_MALLOC_H + +#include + +struct malloc_disk; + +struct malloc_disk *create_malloc_disk(uint64_t num_blocks, uint32_t block_size); + +#endif /* SPDK_BLOCKDEV_MALLOC_H */ diff --git a/lib/bdev/malloc/blockdev_malloc_rpc.c b/lib/bdev/malloc/blockdev_malloc_rpc.c new file mode 100644 index 000000000..1155744e8 --- /dev/null +++ b/lib/bdev/malloc/blockdev_malloc_rpc.c @@ -0,0 +1,79 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "blockdev_malloc.h" +#include "spdk/log.h" +#include "spdk/rpc.h" + +struct rpc_construct_malloc { + uint32_t num_blocks; + uint32_t block_size; +}; + +static const struct spdk_json_object_decoder rpc_construct_malloc_decoders[] = { + {"num_blocks", offsetof(struct rpc_construct_malloc, num_blocks), spdk_json_decode_uint32}, + {"block_size", offsetof(struct rpc_construct_malloc, block_size), spdk_json_decode_uint32}, +}; + +static void +spdk_rpc_construct_malloc_lun(struct spdk_jsonrpc_server_conn *conn, + const struct spdk_json_val *params, + const struct spdk_json_val *id) +{ + struct rpc_construct_malloc req = {}; + struct spdk_json_write_ctx *w; + + if (spdk_json_decode_object(params, rpc_construct_malloc_decoders, + sizeof(rpc_construct_malloc_decoders) / sizeof(*rpc_construct_malloc_decoders), + &req)) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "spdk_json_decode_object failed\n"); + goto invalid; + } + + if (create_malloc_disk(req.num_blocks, req.block_size) == NULL) { + goto invalid; + } + + if (id == NULL) { + return; + } + + w = spdk_jsonrpc_begin_result(conn, id); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(conn, w); + return; + +invalid: + spdk_jsonrpc_send_error_response(conn, id, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); +} +SPDK_RPC_REGISTER("construct_malloc_lun", spdk_rpc_construct_malloc_lun) diff --git a/lib/bdev/nvme/Makefile b/lib/bdev/nvme/Makefile new file mode 100644 index 000000000..0bf953351 --- /dev/null +++ b/lib/bdev/nvme/Makefile @@ -0,0 +1,41 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +CFLAGS += $(DPDK_INC) +C_SRCS = blockdev_nvme.c +LIBNAME = bdev_nvme + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/bdev/nvme/blockdev_nvme.c b/lib/bdev/nvme/blockdev_nvme.c new file mode 100644 index 000000000..5e8ccb4d0 --- /dev/null +++ b/lib/bdev/nvme/blockdev_nvme.c @@ -0,0 +1,666 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include "spdk/conf.h" +#include "spdk/pci.h" +#include "spdk/log.h" +#include "spdk/bdev.h" +#include "spdk/nvme.h" + +#define MAX_NVME_NAME_LENGTH 64 + +void init_request_mempool(void); +static void blockdev_nvme_get_spdk_running_config(FILE *fp); + +struct nvme_device { + /** + * points to pinned, physically contiguous memory region; + * contains 4KB IDENTIFY structure for controller which is + * target for CONTROLLER IDENTIFY command during initialization + */ + struct spdk_nvme_ctrlr *ctrlr; + + /** linked list pointer for device list */ + TAILQ_ENTRY(nvme_device) tailq; + + int id; +}; + +struct nvme_blockdev { + struct spdk_bdev disk; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct spdk_nvme_qpair *qpair; + uint64_t lba_start; + uint64_t lba_end; + uint64_t blocklen; +}; + +#define NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT 1 +struct nvme_blockio { + struct spdk_nvme_dsm_range dsm_range[NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT]; +}; + +enum data_direction { + BDEV_DISK_READ = 0, + BDEV_DISK_WRITE = 1 +}; + +struct nvme_bdf_whitelist { + uint16_t domain; + uint8_t bus; + uint8_t dev; + uint8_t func; + char name[MAX_NVME_NAME_LENGTH]; +}; + +#define NVME_MAX_BLOCKDEVS_PER_CONTROLLER 256 +#define NVME_MAX_CONTROLLERS 16 +#define NVME_MAX_BLOCKDEVS (NVME_MAX_BLOCKDEVS_PER_CONTROLLER * NVME_MAX_CONTROLLERS) +static struct nvme_blockdev g_blockdev[NVME_MAX_BLOCKDEVS]; +static int blockdev_index_max = 0; +static int nvme_luns_per_ns = 1; +static int nvme_controller_index = 0; +static int LunSizeInMB = 0; +static int num_controllers = -1; +static int unbindfromkernel = 0; + +static TAILQ_HEAD(, nvme_device) g_nvme_devices = TAILQ_HEAD_INITIALIZER(g_nvme_devices);; + +static void nvme_ctrlr_initialize_blockdevs(struct spdk_nvme_ctrlr *ctrlr, + int bdev_per_ns, int ctrlr_id); +static int nvme_library_init(void); +static void nvme_library_fini(void); +int nvme_queue_cmd(struct nvme_blockdev *bdev, struct nvme_blockio *bio, + int direction, void *buf, uint64_t nbytes, uint64_t offset); + +static int +nvme_get_ctx_size(void) +{ + return sizeof(struct nvme_blockio); +} + +SPDK_BDEV_MODULE_REGISTER(nvme_library_init, NULL, blockdev_nvme_get_spdk_running_config, + nvme_get_ctx_size) + +static int64_t +blockdev_nvme_read(struct nvme_blockdev *nbdev, struct nvme_blockio *bio, + void *buf, uint64_t nbytes, off_t offset) +{ + int64_t rc; + + SPDK_TRACELOG(SPDK_TRACE_NVME, "read %lu bytes with offset %#lx to %p\n", + nbytes, offset, buf); + + rc = nvme_queue_cmd(nbdev, bio, BDEV_DISK_READ, buf, nbytes, offset); + if (rc < 0) + return -1; + + return nbytes; +} + +static int64_t +blockdev_nvme_writev(struct nvme_blockdev *nbdev, struct nvme_blockio *bio, + struct iovec *iov, int iovcnt, size_t len, off_t offset) +{ + int64_t rc; + + if ((iovcnt != 1) || (iov->iov_len != len)) + return -1; + + SPDK_TRACELOG(SPDK_TRACE_NVME, "write %lu bytes with offset %#lx from %p\n", + iov->iov_len, offset, iov->iov_base); + + rc = nvme_queue_cmd(nbdev, bio, BDEV_DISK_WRITE, (void *)iov->iov_base, + iov->iov_len, offset); + if (rc < 0) + return -1; + + return iov->iov_len; +} + +static int +blockdev_nvme_check_io(struct spdk_bdev *bdev) +{ + struct nvme_blockdev *nbdev = (struct nvme_blockdev *)bdev; + + spdk_nvme_qpair_process_completions(nbdev->qpair, 0); + + return 0; +} + +static int +blockdev_nvme_destruct(struct spdk_bdev *bdev) +{ + return 0; +} + +static int +blockdev_nvme_flush(struct nvme_blockdev *nbdev, struct nvme_blockio *bio, + uint64_t offset, uint64_t nbytes) +{ + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS); + + return 0; +} + +static int +blockdev_nvme_reset(struct nvme_blockdev *nbdev, struct nvme_blockio *bio) +{ + int rc; + enum spdk_bdev_io_status status; + + status = SPDK_BDEV_IO_STATUS_SUCCESS; + rc = spdk_nvme_ctrlr_reset(nbdev->ctrlr); + if (rc != 0) { + status = SPDK_BDEV_IO_STATUS_FAILED; + } + + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), status); + return rc; +} + +static int +blockdev_nvme_unmap(struct nvme_blockdev *nbdev, struct nvme_blockio *bio, + struct spdk_scsi_unmap_bdesc *umap_d, + uint16_t bdesc_count); + +static void blockdev_nvme_get_rbuf_cb(struct spdk_bdev_io *bdev_io) +{ + int ret; + + ret = blockdev_nvme_read((struct nvme_blockdev *)bdev_io->ctx, + (struct nvme_blockio *)bdev_io->driver_ctx, + bdev_io->u.read.buf, + bdev_io->u.read.nbytes, + bdev_io->u.read.offset); + + if (ret < 0) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static int _blockdev_nvme_submit_request(struct spdk_bdev_io *bdev_io) +{ + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + spdk_bdev_io_get_rbuf(bdev_io, blockdev_nvme_get_rbuf_cb); + return 0; + + case SPDK_BDEV_IO_TYPE_WRITE: + return blockdev_nvme_writev((struct nvme_blockdev *)bdev_io->ctx, + (struct nvme_blockio *)bdev_io->driver_ctx, + bdev_io->u.write.iovs, + bdev_io->u.write.iovcnt, + bdev_io->u.write.len, + bdev_io->u.write.offset); + + case SPDK_BDEV_IO_TYPE_UNMAP: + return blockdev_nvme_unmap((struct nvme_blockdev *)bdev_io->ctx, + (struct nvme_blockio *)bdev_io->driver_ctx, + bdev_io->u.unmap.unmap_bdesc, + bdev_io->u.unmap.bdesc_count); + + case SPDK_BDEV_IO_TYPE_RESET: + return blockdev_nvme_reset((struct nvme_blockdev *)bdev_io->ctx, + (struct nvme_blockio *)bdev_io->driver_ctx); + + case SPDK_BDEV_IO_TYPE_FLUSH: + return blockdev_nvme_flush((struct nvme_blockdev *)bdev_io->ctx, + (struct nvme_blockio *)bdev_io->driver_ctx, + bdev_io->u.flush.offset, + bdev_io->u.flush.length); + + default: + return -1; + } + return 0; +} + +static void blockdev_nvme_submit_request(struct spdk_bdev_io *bdev_io) +{ + if (_blockdev_nvme_submit_request(bdev_io) < 0) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static void blockdev_nvme_free_request(struct spdk_bdev_io *bdev_io) +{ +} + +static struct spdk_bdev_fn_table nvmelib_fn_table = { + .destruct = blockdev_nvme_destruct, + .check_io = blockdev_nvme_check_io, + .submit_request = blockdev_nvme_submit_request, + .free_request = blockdev_nvme_free_request, +}; + +struct nvme_probe_ctx { + int controllers_remaining; + int num_whitelist_controllers; + struct nvme_bdf_whitelist whitelist[NVME_MAX_CONTROLLERS]; +}; + +static bool +probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_nvme_ctrlr_opts *opts) +{ + struct nvme_probe_ctx *ctx = cb_ctx; + uint16_t found_domain = spdk_pci_device_get_domain(pci_dev); + uint8_t found_bus = spdk_pci_device_get_bus(pci_dev); + uint8_t found_dev = spdk_pci_device_get_dev(pci_dev); + uint8_t found_func = spdk_pci_device_get_func(pci_dev); + int i; + bool claim_device = false; + + SPDK_NOTICELOG("Probing device %x:%x:%x.%x\n", + found_domain, found_bus, found_dev, found_func); + + if (ctx->controllers_remaining == 0) { + return false; + } + + if (ctx->num_whitelist_controllers == 0) { + claim_device = true; + } else { + for (i = 0; i < NVME_MAX_CONTROLLERS; i++) { + if (found_domain == ctx->whitelist[i].domain && + found_bus == ctx->whitelist[i].bus && + found_dev == ctx->whitelist[i].dev && + found_func == ctx->whitelist[i].func) { + claim_device = true; + break; + } + } + } + + if (!claim_device) { + return false; + } + + if (spdk_pci_device_has_non_uio_driver(pci_dev)) { + /* NVMe kernel driver case */ + if (unbindfromkernel || ctx->num_whitelist_controllers > 0) { + if (spdk_pci_device_switch_to_uio_driver(pci_dev)) { + return false; + } + } else { + SPDK_WARNLOG("Device has kernel nvme driver attached, skipping...\n"); + return false; + } + } else { + if (spdk_pci_device_bind_uio_driver(pci_dev)) { + SPDK_WARNLOG("Device %s %d:%d:%d bind to uio driver failed\n", + spdk_pci_device_get_device_name(pci_dev), + spdk_pci_device_get_bus(pci_dev), + spdk_pci_device_get_dev(pci_dev), + spdk_pci_device_get_func(pci_dev)); + return false; + } + } + + /* Claim the device in case conflict with other process */ + if (spdk_pci_device_claim(pci_dev) != 0) { + return false; + } + + return true; +} + +static void +attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_nvme_ctrlr *ctrlr, + const struct spdk_nvme_ctrlr_opts *opts) +{ + struct nvme_probe_ctx *ctx = cb_ctx; + struct nvme_device *dev; + + dev = malloc(sizeof(struct nvme_device)); + if (dev == NULL) { + SPDK_ERRLOG("Failed to allocate device struct\n"); + return; + } + + dev->ctrlr = ctrlr; + dev->id = nvme_controller_index++; + + nvme_ctrlr_initialize_blockdevs(dev->ctrlr, nvme_luns_per_ns, dev->id); + TAILQ_INSERT_TAIL(&g_nvme_devices, dev, tailq); + + if (ctx->controllers_remaining > 0) { + ctx->controllers_remaining--; + } +} + + +static int +nvme_library_init(void) +{ + struct spdk_conf_section *sp; + const char *val; + int i, rc; + struct nvme_probe_ctx probe_ctx; + + sp = spdk_conf_find_section(NULL, "Nvme"); + if (sp == NULL) { + /* + * If configuration file did not specify the Nvme section, do + * not take the time to initialize the NVMe devices. + */ + return 0; + } + + init_request_mempool(); + + nvme_luns_per_ns = spdk_conf_section_get_intval(sp, "NvmeLunsPerNs"); + if (nvme_luns_per_ns < 1) + nvme_luns_per_ns = 1; + + if (nvme_luns_per_ns > NVME_MAX_BLOCKDEVS_PER_CONTROLLER) { + SPDK_ERRLOG("The input value nvme_luns_per_ns(%d) exceeds the maximal " + "value(%d)\n", nvme_luns_per_ns, NVME_MAX_BLOCKDEVS_PER_CONTROLLER); + return -1; + } + + LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB"); + + if (LunSizeInMB < 0) + LunSizeInMB = 0; + + spdk_nvme_retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount"); + if (spdk_nvme_retry_count < 0) + spdk_nvme_retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT; + + /* + * If NumControllers is not found, this will return -1, which we + * will later use to denote that we should initialize all + * controllers. + */ + num_controllers = spdk_conf_section_get_intval(sp, "NumControllers"); + + val = spdk_conf_section_get_val(sp, "UnbindFromKernel"); + if (val != NULL) { + if (!strcmp(val, "Yes")) { + unbindfromkernel = 1; + } + } + + /* Init the whitelist */ + probe_ctx.num_whitelist_controllers = 0; + + if (num_controllers > 0) { + for (i = 0; ; i++) { + unsigned int domain, bus, dev, func; + + val = spdk_conf_section_get_nmval(sp, "BDF", i, 0); + if (val == NULL) { + break; + } + + rc = sscanf(val, "%x:%x:%x.%x", &domain, &bus, &dev, &func); + if (rc != 4) { + SPDK_ERRLOG("Invalid format for BDF: %s\n", val); + return -1; + } + + probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].domain = domain; + probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].bus = bus; + probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].dev = dev; + probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].func = func; + + val = spdk_conf_section_get_nmval(sp, "BDF", i, 1); + if (val == NULL) { + SPDK_ERRLOG("BDF section with no device name\n"); + return -1; + } + + snprintf(probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].name, MAX_NVME_NAME_LENGTH, "%s", + val); + + probe_ctx.num_whitelist_controllers++; + } + } + + probe_ctx.controllers_remaining = num_controllers; + + if (spdk_nvme_probe(&probe_ctx, probe_cb, attach_cb, NULL)) { + return -1; + } + + return 0; +} + +__attribute__((destructor)) void +nvme_library_fini(void) +{ + struct nvme_device *dev; + + while (!TAILQ_EMPTY(&g_nvme_devices)) { + dev = TAILQ_FIRST(&g_nvme_devices); + TAILQ_REMOVE(&g_nvme_devices, dev, tailq); + spdk_nvme_detach(dev->ctrlr); + free(dev); + } +} + +void +nvme_ctrlr_initialize_blockdevs(struct spdk_nvme_ctrlr *ctrlr, int bdev_per_ns, int ctrlr_id) +{ + struct nvme_blockdev *bdev; + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ctrlr_data *cdata; + uint64_t bdev_size, lba_offset, sectors_per_stripe; + int ns_id, num_ns, bdev_idx; + uint64_t LunSizeInsector; + + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + for (ns_id = 1; ns_id <= num_ns; ns_id++) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + bdev_size = spdk_nvme_ns_get_num_sectors(ns) / bdev_per_ns; + + /* + * Align each blockdev on a 1MB boundary - this helps cover Fultondale case + * where I/O that span a 128KB boundary must be split for optimal performance. + * Using a 1MB hardcoded boundary here so that we do not have to export + * stripe size information from the NVMe driver for now. + */ + sectors_per_stripe = (1 << 20) / spdk_nvme_ns_get_sector_size(ns); + + LunSizeInsector = ((uint64_t)LunSizeInMB << 20) / spdk_nvme_ns_get_sector_size(ns); + if ((LunSizeInMB > 0) && (LunSizeInsector < bdev_size)) + bdev_size = LunSizeInsector; + + bdev_size &= ~(sectors_per_stripe - 1); + + lba_offset = 0; + for (bdev_idx = 0; bdev_idx < bdev_per_ns; bdev_idx++) { + if (blockdev_index_max >= NVME_MAX_BLOCKDEVS) + return; + + bdev = &g_blockdev[blockdev_index_max]; + bdev->ctrlr = ctrlr; + bdev->ns = ns; + bdev->lba_start = lba_offset; + bdev->lba_end = lba_offset + bdev_size - 1; + lba_offset += bdev_size; + + snprintf(bdev->disk.name, SPDK_BDEV_MAX_NAME_LENGTH, + "Nvme%dn%dp%d", ctrlr_id, spdk_nvme_ns_get_id(ns), bdev_idx); + snprintf(bdev->disk.product_name, SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH, + "iSCSI NVMe disk"); + + bdev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, 0); + if (!bdev->qpair) { + SPDK_ERRLOG("Could not allocate I/O queue pair for %s\n", + bdev->disk.name); + continue; + } + + if (cdata->oncs.dsm) { + /* + * Enable the thin provisioning + * if nvme controller supports + * DataSet Management command. + */ + bdev->disk.thin_provisioning = 1; + bdev->disk.max_unmap_bdesc_count = + NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT; + } + bdev->disk.write_cache = 1; + bdev->blocklen = spdk_nvme_ns_get_sector_size(ns); + bdev->disk.blocklen = bdev->blocklen; + bdev->disk.blockcnt = bdev->lba_end - bdev->lba_start + 1; + bdev->disk.ctxt = bdev; + bdev->disk.fn_table = &nvmelib_fn_table; + spdk_bdev_register(&bdev->disk); + + blockdev_index_max++; + } + } +} + +static void +queued_done(void *ref, const struct spdk_nvme_cpl *cpl) +{ + struct nvme_blockio *bio = ref; + enum spdk_bdev_io_status status; + + if (spdk_nvme_cpl_is_error(cpl)) { + status = SPDK_BDEV_IO_STATUS_FAILED; + } else { + status = SPDK_BDEV_IO_STATUS_SUCCESS; + } + + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), status); +} + +int +nvme_queue_cmd(struct nvme_blockdev *bdev, struct nvme_blockio *bio, + int direction, void *buf, uint64_t nbytes, uint64_t offset) +{ + uint32_t ss = spdk_nvme_ns_get_sector_size(bdev->ns); + uint32_t lba_count; + uint64_t relative_lba = offset / bdev->blocklen; + uint64_t next_lba = relative_lba + bdev->lba_start; + int rc; + + if (nbytes % ss) { + SPDK_ERRLOG("Unaligned IO request length\n"); + return -1; + } + + + lba_count = nbytes / ss; + + if (direction == BDEV_DISK_READ) { + rc = spdk_nvme_ns_cmd_read(bdev->ns, bdev->qpair, buf, next_lba, + lba_count, queued_done, bio, 0); + } else { + rc = spdk_nvme_ns_cmd_write(bdev->ns, bdev->qpair, buf, next_lba, + lba_count, queued_done, bio, 0); + } + + if (rc != 0) { + SPDK_ERRLOG("IO failed\n"); + } + return rc; +} + +static int +blockdev_nvme_unmap(struct nvme_blockdev *nbdev, struct nvme_blockio *bio, + struct spdk_scsi_unmap_bdesc *unmap_d, + uint16_t bdesc_count) +{ + int rc = 0, i; + + for (i = 0; i < bdesc_count; i++) { + bio->dsm_range[i].starting_lba = + nbdev->lba_start + be64toh(unmap_d->lba); + bio->dsm_range[i].length = be32toh(unmap_d->block_count); + unmap_d++; + } + + rc = spdk_nvme_ns_cmd_deallocate(nbdev->ns, nbdev->qpair, bio->dsm_range, bdesc_count, + queued_done, bio); + + if (rc != 0) + return -1; + + return 0; +} + +struct rte_mempool *request_mempool; + +void init_request_mempool() +{ + request_mempool = rte_mempool_create("nvme request", 8192, + spdk_nvme_request_size(), + 128, 0, NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); +} + +static void +blockdev_nvme_get_spdk_running_config(FILE *fp) +{ + fprintf(fp, + "\n" + "# Users may change this to partition an NVMe namespace into multiple LUNs.\n" + "[Nvme]\n" + " UnbindFromKernel %s\n" + " NvmeLunsPerNs %d\n", + unbindfromkernel ? "Yes" : "No", + nvme_luns_per_ns); + if (num_controllers != -1) { + fprintf(fp, " NumControllers %d\n", num_controllers); + } + if (LunSizeInMB != 0) { + fprintf(fp, " LunSizeInMB %d\n", LunSizeInMB); + } +} + +SPDK_LOG_REGISTER_TRACE_FLAG("nvme", SPDK_TRACE_NVME) diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk new file mode 100644 index 000000000..3ce5771cd --- /dev/null +++ b/mk/spdk.modules.mk @@ -0,0 +1,17 @@ +BLOCKDEV_MODULES += $(SPDK_ROOT_DIR)/lib/bdev/malloc/libspdk_bdev_malloc.a + +BLOCKDEV_MODULES += $(SPDK_ROOT_DIR)/lib/bdev/nvme/libspdk_bdev_nvme.a \ + $(SPDK_ROOT_DIR)/lib/nvme/libspdk_nvme.a + +COPY_MODULES += $(SPDK_ROOT_DIR)/lib/copy/ioat/libspdk_copy_ioat.a \ + $(SPDK_ROOT_DIR)/lib/ioat/libspdk_ioat.a + +BLOCKDEV_MODULES_LINKER_ARGS = -Wl,--whole-archive \ + $(BLOCKDEV_MODULES) \ + -Wl,--no-whole-archive \ + $(BLOCKDEV_MODULES_DEPS) + +COPY_MODULES_LINKER_ARGS = -Wl,--whole-archive \ + $(COPY_MODULES) \ + -Wl,--no-whole-archive \ + $(COPY_MODULES_DEPS) diff --git a/test/lib/Makefile b/test/lib/Makefile index 2b376ee9d..a8994b3fd 100644 --- a/test/lib/Makefile +++ b/test/lib/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y = event log json jsonrpc nvme memory ioat +DIRS-y = bdev event log json jsonrpc nvme memory ioat DIRS-$(CONFIG_RDMA) += nvmf .PHONY: all clean $(DIRS-y) diff --git a/test/lib/bdev/Makefile b/test/lib/bdev/Makefile new file mode 100644 index 000000000..4b872ec34 --- /dev/null +++ b/test/lib/bdev/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y = bdevio bdevperf + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/test/lib/bdev/bdev.conf b/test/lib/bdev/bdev.conf new file mode 100644 index 000000000..754cd39d3 --- /dev/null +++ b/test/lib/bdev/bdev.conf @@ -0,0 +1,10 @@ +[Nvme] + NvmeLunsPerNs 1 + UnbindFromKernel Yes + +# autotest.sh will automatically rmmod ioatdma, so we do +# not need to specify UnbindFromKernel and Whitelist +# entries to enable ioat offload for this malloc LUN +[Malloc] + NumberOfLuns 5 + LunSizeInMB 32 diff --git a/test/lib/bdev/bdevio/.gitignore b/test/lib/bdev/bdevio/.gitignore new file mode 100644 index 000000000..1bb55429d --- /dev/null +++ b/test/lib/bdev/bdevio/.gitignore @@ -0,0 +1 @@ +bdevio diff --git a/test/lib/bdev/bdevio/Makefile b/test/lib/bdev/bdevio/Makefile new file mode 100644 index 000000000..675218804 --- /dev/null +++ b/test/lib/bdev/bdevio/Makefile @@ -0,0 +1,69 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = bdevio + +C_SRCS := bdevio.c + +CFLAGS += -I. $(DPDK_INC) + +SPDK_LIBS += $(SPDK_ROOT_DIR)/lib/bdev/libspdk_bdev.a \ + $(SPDK_ROOT_DIR)/lib/copy/libspdk_copy.a \ + $(SPDK_ROOT_DIR)/lib/event/libspdk_event.a \ + $(SPDK_ROOT_DIR)/lib/trace/libspdk_trace.a \ + $(SPDK_ROOT_DIR)/lib/log/libspdk_log.a \ + $(SPDK_ROOT_DIR)/lib/conf/libspdk_conf.a \ + $(SPDK_ROOT_DIR)/lib/util/libspdk_util.a \ + $(SPDK_ROOT_DIR)/lib/memory/libspdk_memory.a \ + $(SPDK_ROOT_DIR)/lib/rpc/libspdk_rpc.a \ + $(SPDK_ROOT_DIR)/lib/jsonrpc/libspdk_jsonrpc.a \ + $(SPDK_ROOT_DIR)/lib/json/libspdk_json.a + +LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \ + $(COPY_MODULES_LINKER_ARGS) + +LIBS += $(SPDK_LIBS) $(PCIACCESS_LIB) $(DPDK_LIB) -lcunit + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIBS) $(BLOCKDEV_MODULES) $(LINKER_MODULES) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/lib/bdev/bdevio/bdevio.c b/test/lib/bdev/bdevio/bdevio.c new file mode 100644 index 000000000..6af44a1be --- /dev/null +++ b/test/lib/bdev/bdevio/bdevio.c @@ -0,0 +1,547 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "spdk/bdev.h" +#include "spdk/bdev_db.h" +#include "spdk/copy_engine.h" +#include "spdk/log.h" + +#include "CUnit/Basic.h" + +#define BUFFER_SIZE 260 * 1024 +#define BDEV_TASK_ARRAY_SIZE 2048 + +#include "../common.c" + +struct io_target { + struct spdk_bdev *bdev; + struct io_target *next; +}; + +struct io_target *g_io_targets = NULL; + +static int +bdevio_construct_targets(void) +{ + struct blockdev_entry *bdev_entry = g_bdevs; + struct spdk_bdev *bdev; + struct io_target *target; + + while (bdev_entry != NULL) { + bdev = bdev_entry->bdev; + + if (bdev->claimed) { + bdev_entry = bdev_entry->next; + continue; + } + + target = malloc(sizeof(struct io_target)); + if (target == NULL) { + return -ENOMEM; + } + target->bdev = bdev; + target->next = g_io_targets; + g_io_targets = target; + bdev_entry = bdev_entry->next; + } + + return 0; +} + +static int complete; +static enum spdk_bdev_io_status completion_status_per_io; + +static void +initialize_buffer(char **buf, int pattern, int size) +{ + *buf = rte_malloc(NULL, size, 0x1000); + memset(*buf, pattern, size); +} + +static void +quick_test_complete(spdk_event_t event) +{ + struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event); + + completion_status_per_io = bdev_io->status; + complete = 1; + + spdk_bdev_free_io(bdev_io); +} + +static int +check_io_completion(void) +{ + int rc; + struct blockdev_entry *bdev_entry; + + rc = 0; + while (!complete) { + bdev_entry = g_bdevs; + while (bdev_entry != NULL) { + spdk_bdev_do_work(bdev_entry->bdev); + bdev_entry = bdev_entry->next; + } + spdk_event_queue_run_all(rte_lcore_id()); + } + return rc; +} + +struct iovec iov; + +static int +blockdev_write(struct io_target *target, void *bdev_task_ctx, char **tx_buf, + int data_len, uint64_t offset) +{ + struct spdk_bdev_io *bdev_io; + + complete = 0; + completion_status_per_io = SPDK_BDEV_IO_STATUS_FAILED; + + iov.iov_base = *tx_buf; + iov.iov_len = data_len; + bdev_io = spdk_bdev_writev(target->bdev, &iov, 1, iov.iov_len, + (uint64_t)offset, quick_test_complete, + bdev_task_ctx); + if (!bdev_io) { + return -1; + } + + return data_len; +} + +static int +blockdev_read(struct io_target *target, void *bdev_task_ctx, char **rx_buf, + int data_len, uint64_t offset) +{ + struct spdk_bdev_io *bdev_io; + + complete = 0; + completion_status_per_io = SPDK_BDEV_IO_STATUS_FAILED; + + bdev_io = spdk_bdev_read(target->bdev, *rx_buf, data_len, offset, + quick_test_complete, bdev_task_ctx); + + if (!bdev_io) { + return -1; + } + + return data_len; +} + +static int +blockdev_write_read_data_match(char **rx_buf, char **tx_buf, int data_length) +{ + int rc; + rc = memcmp(*rx_buf, *tx_buf, data_length); + + rte_free(*rx_buf); + rte_free(*tx_buf); + + return rc; +} + +static void +blockdev_write_read(uint32_t data_length, int pattern, uint64_t offset, + int expected_rc) +{ + struct io_target *target; + char bdev_task_ctx[BDEV_TASK_ARRAY_SIZE]; + char *tx_buf = NULL; + char *rx_buf = NULL; + int rc; + + target = g_io_targets; + while (target != NULL) { + if (data_length < target->bdev->blocklen) { + target = target->next; + continue; + } + + initialize_buffer(&tx_buf, pattern, data_length); + initialize_buffer(&rx_buf, 0, data_length); + + rc = blockdev_write(target, (void *)bdev_task_ctx, &tx_buf, + data_length, offset); + + /* Assert the rc of the respective blockdev */ + CU_ASSERT_EQUAL(rc, expected_rc); + + /* If the write was successful, the function returns the data_length + * and the completion_status_per_io is 0 */ + if (rc < (int)data_length) { + CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_FAILED); + } else { + check_io_completion(); + CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_SUCCESS); + } + + rc = blockdev_read(target, (void *)bdev_task_ctx, &rx_buf, + data_length, offset); + + /* Assert the rc of the respective blockdev */ + CU_ASSERT_EQUAL(rc, expected_rc); + + /* If the read was successful, the function returns the data_length + * and the completion_status_per_io is 0 */ + if (rc < (int)data_length) { + CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_FAILED); + } else { + check_io_completion(); + CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_SUCCESS); + } + + if (completion_status_per_io == SPDK_BDEV_IO_STATUS_SUCCESS) { + rc = blockdev_write_read_data_match(&rx_buf, &tx_buf, data_length); + /* Assert the write by comparing it with values read + * from each blockdev */ + CU_ASSERT_EQUAL(rc, 0); + } + + target = target->next; + } +} + +static void +blockdev_write_read_4k(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 4K */ + data_length = 4096; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is the data_length */ + expected_rc = data_length; + + blockdev_write_read(data_length, pattern, offset, expected_rc); +} + +static void +blockdev_write_read_512Bytes(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 512 */ + data_length = 512; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 2048; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is the data_length */ + expected_rc = data_length; + + blockdev_write_read(data_length, pattern, offset, expected_rc); +} + +static void +blockdev_write_read_size_gt_128k(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 132K */ + data_length = 135168; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 2048; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is the data_length */ + expected_rc = data_length; + + blockdev_write_read(data_length, pattern, offset, expected_rc); +} + +static void +blockdev_write_read_invalid_size(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size is not a multiple of the block size */ + data_length = 0x1015; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 2048; + pattern = 0xA3; + /* Params are invalid, hence the expected return value + * of write and read for all blockdevs is < 0 */ + expected_rc = -1; + + blockdev_write_read(data_length, pattern, offset, expected_rc); +} + +static void +blockdev_write_read_offset_plus_nbytes_equals_bdev_size(void) +{ + struct io_target *target; + struct spdk_bdev *bdev; + char bdev_task_ctx[BDEV_TASK_ARRAY_SIZE]; + char *tx_buf = NULL; + char *rx_buf = NULL; + uint64_t offset; + int rc; + + target = g_io_targets; + while (target != NULL) { + bdev = target->bdev; + + /* The start offset has been set to a marginal value + * such that offset + nbytes == Total size of + * blockdev. */ + offset = ((bdev->blockcnt - 1) * bdev->blocklen); + + initialize_buffer(&tx_buf, 0xA3, bdev->blocklen); + initialize_buffer(&rx_buf, 0, bdev->blocklen); + + rc = blockdev_write(target, (void *)bdev_task_ctx, &tx_buf, + bdev->blocklen, offset); + + /* Assert the rc of the respective blockdev */ + CU_ASSERT_EQUAL(rc, (int)bdev->blocklen); + + /* If the write was successful, the function returns the data_length + * and the completion_status_per_io is 0 */ + check_io_completion(); + CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_SUCCESS); + + rc = blockdev_read(target, (void *)bdev_task_ctx, &rx_buf, + bdev->blocklen, offset); + + /* Assert the rc of the respective blockdev */ + CU_ASSERT_EQUAL(rc, (int)bdev->blocklen); + + /* If the read was successful, the function returns the data_length + * and the completion_status_per_io is 0 */ + check_io_completion(); + CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_SUCCESS); + + rc = blockdev_write_read_data_match(&rx_buf, &tx_buf, bdev->blocklen); + /* Assert the write by comparing it with values read + * from each blockdev */ + CU_ASSERT_EQUAL(rc, 0); + + target = target->next; + } +} + +static void +blockdev_write_read_offset_plus_nbytes_gt_bdev_size(void) +{ + struct io_target *target; + struct spdk_bdev *bdev; + char bdev_task_ctx[BDEV_TASK_ARRAY_SIZE]; + char *tx_buf = NULL; + char *rx_buf = NULL; + int data_length; + uint64_t offset; + int pattern; + int expected_rc; + int rc; + + /* Tests the overflow condition of the blockdevs. */ + data_length = 4096; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + pattern = 0xA3; + /* Params are invalid, hence the expected return value + * of write and read is < 0.*/ + expected_rc = -1; + + target = g_io_targets; + while (target != NULL) { + bdev = target->bdev; + + /* The start offset has been set to a valid value + * but offset + nbytes is greater than the Total size + * of the blockdev. The test should fail. */ + offset = ((bdev->blockcnt * bdev->blocklen) - 1024); + + initialize_buffer(&tx_buf, pattern, data_length); + initialize_buffer(&rx_buf, 0, data_length); + + rc = blockdev_write(target, (void *)bdev_task_ctx, &tx_buf, + data_length, offset); + + /* Assert the rc of the respective blockdev */ + CU_ASSERT_EQUAL(rc, expected_rc); + + /* If the write failed, the function returns rcnext; + } +} + +static void +blockdev_write_read_max_offset(void) +{ + int data_length; + uint64_t offset; + int pattern; + int expected_rc; + + data_length = 4096; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + /* The start offset has been set to UINT64_MAX such that + * adding nbytes wraps around and points to an invalid address. */ + offset = UINT64_MAX; + pattern = 0xA3; + /* Params are invalid, hence the expected return value + * of write and read for all blockdevs is < 0 */ + expected_rc = -1; + + blockdev_write_read(data_length, pattern, offset, expected_rc); +} + +static void +blockdev_overlapped_write_read_8k(void) +{ + int data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 8K */ + data_length = 8192; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is the data_length */ + expected_rc = data_length; + /* Assert the write by comparing it with values read + * from the same offset for each blockdev */ + blockdev_write_read(data_length, pattern, offset, expected_rc); + + /* Overwrite the pattern 0xbb of size 8K on an address offset overlapping + * with the address written above and assert the new value in + * the overlapped address range */ + /* Populate 8k with value 0xBB */ + pattern = 0xBB; + /* Offset = 6144; Overlap offset addresses and write value 0xbb */ + offset = 4096; + /* Assert the write by comparing it with values read + * from the overlapped offset for each blockdev */ + blockdev_write_read(data_length, pattern, offset, expected_rc); +} + + +int +main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + const char *config_file; + unsigned int num_failures; + + if (argc == 1) { + config_file = "/usr/local/etc/spdk/iscsi.conf"; + } else { + config_file = argv[1]; + } + + bdevtest_init(config_file, "0x1"); + + if (bdevio_construct_targets() < 0) { + return 1; + } + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("components_suite", NULL, NULL); + if (suite == NULL) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if ( + CU_add_test(suite, "blockdev write read 4k", blockdev_write_read_4k) == NULL + || CU_add_test(suite, "blockdev write read 512 bytes", + blockdev_write_read_512Bytes) == NULL + || CU_add_test(suite, "blockdev write read size > 128k", + blockdev_write_read_size_gt_128k) == NULL + || CU_add_test(suite, "blockdev write read invalid size", + blockdev_write_read_invalid_size) == NULL + || CU_add_test(suite, "blockdev write read offset + nbytes == size of blockdev", + blockdev_write_read_offset_plus_nbytes_equals_bdev_size) == NULL + || CU_add_test(suite, "blockdev write read offset + nbytes > size of blockdev", + blockdev_write_read_offset_plus_nbytes_gt_bdev_size) == NULL + || CU_add_test(suite, "blockdev write read max offset", + blockdev_write_read_max_offset) == NULL + || CU_add_test(suite, "blockdev write read 8k on overlapped address offset", + blockdev_overlapped_write_read_8k) == NULL + ) { + CU_cleanup_registry(); + return CU_get_error(); + } + + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +} diff --git a/test/lib/bdev/bdevperf/.gitignore b/test/lib/bdev/bdevperf/.gitignore new file mode 100644 index 000000000..e14ddd841 --- /dev/null +++ b/test/lib/bdev/bdevperf/.gitignore @@ -0,0 +1 @@ +bdevperf diff --git a/test/lib/bdev/bdevperf/Makefile b/test/lib/bdev/bdevperf/Makefile new file mode 100644 index 000000000..5a463662f --- /dev/null +++ b/test/lib/bdev/bdevperf/Makefile @@ -0,0 +1,69 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = bdevperf + +C_SRCS := bdevperf.c + +CFLAGS += -I. $(DPDK_INC) + +SPDK_LIBS += $(SPDK_ROOT_DIR)/lib/bdev/libspdk_bdev.a \ + $(SPDK_ROOT_DIR)/lib/copy/libspdk_copy.a \ + $(SPDK_ROOT_DIR)/lib/event/libspdk_event.a \ + $(SPDK_ROOT_DIR)/lib/trace/libspdk_trace.a \ + $(SPDK_ROOT_DIR)/lib/log/libspdk_log.a \ + $(SPDK_ROOT_DIR)/lib/conf/libspdk_conf.a \ + $(SPDK_ROOT_DIR)/lib/util/libspdk_util.a \ + $(SPDK_ROOT_DIR)/lib/memory/libspdk_memory.a \ + $(SPDK_ROOT_DIR)/lib/rpc/libspdk_rpc.a \ + $(SPDK_ROOT_DIR)/lib/jsonrpc/libspdk_jsonrpc.a \ + $(SPDK_ROOT_DIR)/lib/json/libspdk_json.a + +LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \ + $(COPY_MODULES_LINKER_ARGS) + +LIBS += $(SPDK_LIBS) $(PCIACCESS_LIB) $(DPDK_LIB) + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIBS) $(BLOCKDEV_MODULES) $(COPY_MODULES) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/lib/bdev/bdevperf/bdevperf.c b/test/lib/bdev/bdevperf/bdevperf.c new file mode 100644 index 000000000..a9148f4da --- /dev/null +++ b/test/lib/bdev/bdevperf/bdevperf.c @@ -0,0 +1,684 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spdk/bdev.h" +#include "spdk/bdev_db.h" +#include "spdk/copy_engine.h" +#include "spdk/log.h" + +struct bdevperf_task { + struct iovec iov; + struct io_target *target; + void *buf; +}; + +static int g_io_size = 0; +/* initialize to invalid value so we can detect if user overrides it. */ +static int g_rw_percentage = -1; +static int g_is_random; +static bool g_verify = false; +static bool g_reset = false; +static bool g_unmap = false; +static int g_queue_depth; +static int g_time_in_sec; +static int g_show_performance_real_time = 0; +static bool g_run_failed = false; +static bool g_zcopy = true; + +static struct rte_timer g_perf_timer; + +static void bdevperf_submit_single(struct io_target *target); + +#include "../common.c" + +struct io_target { + struct spdk_bdev *bdev; + struct io_target *next; + unsigned lcore; + int io_completed; + int current_queue_depth; + uint64_t size_in_ios; + uint64_t offset_in_ios; + bool is_draining; + struct rte_timer run_timer; + struct rte_timer reset_timer; +}; + +struct io_target *head[RTE_MAX_LCORE]; +static int g_target_count = 0; + +/* + * Used to determine how the I/O buffers should be aligned. + * This alignment will be bumped up for blockdevs that + * require alignment based on block length - for example, + * AIO blockdevs. + */ +static uint32_t g_min_alignment = 8; + +static void +blockdev_heads_init(void) +{ + int i; + + for (i = 0; i < RTE_MAX_LCORE; i++) { + head[i] = NULL; + } +} + +static void +bdevperf_construct_targets(void) +{ + int index = 0; + struct blockdev_entry *bdev_entry = g_bdevs; + struct spdk_bdev *bdev; + struct io_target *target; + + while (bdev_entry != NULL) { + bdev = bdev_entry->bdev; + + if (bdev->claimed) { + bdev_entry = bdev_entry->next; + continue; + } + + if (g_unmap && !bdev->thin_provisioning) { + printf("Skipping %s because it does not support unmap\n", bdev->name); + bdev_entry = bdev_entry->next; + continue; + } + + target = malloc(sizeof(struct io_target)); + if (!target) { + fprintf(stderr, "Unable to allocate memory for new target.\n"); + /* Return immediately because all mallocs will presumably fail after this */ + return; + } + target->bdev = bdev; + /* Mapping each target to lcore */ + index = g_target_count % spdk_app_get_core_count(); + target->next = head[index]; + target->lcore = index; + target->io_completed = 0; + target->current_queue_depth = 0; + target->offset_in_ios = 0; + target->size_in_ios = (bdev->blockcnt * bdev->blocklen) / + g_io_size; + if (bdev->need_aligned_buffer && g_min_alignment < bdev->blocklen) { + g_min_alignment = bdev->blocklen; + } + + target->is_draining = false; + rte_timer_init(&target->run_timer); + rte_timer_init(&target->reset_timer); + + head[index] = target; + g_target_count++; + bdev_entry = bdev_entry->next; + } +} + +static void +end_run(spdk_event_t event) +{ + if (--g_target_count == 0) { + if (g_show_performance_real_time) { + rte_timer_stop_sync(&g_perf_timer); + } + spdk_app_stop(0); + } +} + +struct rte_mempool *task_pool; + +static void +bdevperf_complete(spdk_event_t event) +{ + struct io_target *target; + struct bdevperf_task *task = spdk_event_get_arg1(event); + struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event); + spdk_event_t complete; + + if (bdev_io->status != SPDK_BDEV_IO_STATUS_SUCCESS) { + g_run_failed = true; + } else if (g_verify || g_reset || g_unmap) { + if (memcmp(task->buf, bdev_io->u.read.buf, g_io_size) != 0) { + printf("Buffer mismatch! Disk Offset: %lu\n", bdev_io->u.read.offset); + g_run_failed = true; + } + } + + target = task->target; + target->current_queue_depth--; + target->io_completed++; + + bdev_io->caller_ctx = NULL; + rte_mempool_put(task_pool, task); + + spdk_bdev_free_io(bdev_io); + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!target->is_draining) { + bdevperf_submit_single(target); + } else if (target->current_queue_depth == 0) { + complete = spdk_event_allocate(rte_get_master_lcore(), end_run, NULL, NULL, NULL); + spdk_event_call(complete); + } +} + +static void +bdevperf_unmap_complete(spdk_event_t event) +{ + struct io_target *target; + struct bdevperf_task *task = spdk_event_get_arg1(event); + struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event); + + target = task->target; + + /* Set the expected buffer to 0. */ + memset(task->buf, 0, g_io_size); + + /* Read the data back in */ + spdk_bdev_read(target->bdev, NULL, + be32toh(bdev_io->u.unmap.unmap_bdesc->block_count) * target->bdev->blocklen, + be64toh(bdev_io->u.unmap.unmap_bdesc->lba) * target->bdev->blocklen, + bdevperf_complete, task); + + free(bdev_io->u.unmap.unmap_bdesc); + spdk_bdev_free_io(bdev_io); + +} + +static void +bdevperf_verify_write_complete(spdk_event_t event) +{ + struct io_target *target; + struct bdevperf_task *task = spdk_event_get_arg1(event); + struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event); + + target = task->target; + + if (g_unmap) { + /* Unmap the data */ + struct spdk_scsi_unmap_bdesc *bdesc = calloc(1, sizeof(*bdesc)); + if (bdesc == NULL) { + fprintf(stderr, "memory allocation failure\n"); + exit(1); + } + + bdesc->lba = htobe64(bdev_io->u.write.offset / target->bdev->blocklen); + bdesc->block_count = htobe32(bdev_io->u.write.len / target->bdev->blocklen); + + spdk_bdev_unmap(target->bdev, bdesc, 1, bdevperf_unmap_complete, + task); + } else { + /* Read the data back in */ + spdk_bdev_read(target->bdev, NULL, + bdev_io->u.write.len, + bdev_io->u.write.offset, + bdevperf_complete, task); + } + + spdk_bdev_free_io(bdev_io); +} + +static void +task_ctor(struct rte_mempool *mp, void *arg, void *__task, unsigned id) +{ + struct bdevperf_task *task = __task; + + task->buf = rte_malloc(NULL, g_io_size, g_min_alignment); +} + +static __thread unsigned int seed = 0; + +static void +bdevperf_submit_single(struct io_target *target) +{ + struct spdk_bdev *bdev; + struct bdevperf_task *task = NULL; + uint64_t offset_in_ios; + void *rbuf; + + bdev = target->bdev; + + if (rte_mempool_get(task_pool, (void **)&task) != 0 || task == NULL) { + printf("Task pool allocation failed\n"); + abort(); + } + + task->target = target; + + if (g_is_random) { + offset_in_ios = rand_r(&seed) % target->size_in_ios; + } else { + offset_in_ios = target->offset_in_ios++; + if (target->offset_in_ios == target->size_in_ios) { + target->offset_in_ios = 0; + } + } + + if (g_verify || g_reset || g_unmap) { + memset(task->buf, rand_r(&seed) % 256, g_io_size); + task->iov.iov_base = task->buf; + task->iov.iov_len = g_io_size; + spdk_bdev_writev(bdev, &task->iov, 1, g_io_size, + offset_in_ios * g_io_size, + bdevperf_verify_write_complete, task); + } else if ((g_rw_percentage == 100) || + (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { + rbuf = g_zcopy ? NULL : task->buf; + spdk_bdev_read(bdev, rbuf, g_io_size, + offset_in_ios * g_io_size, + bdevperf_complete, task); + } else { + task->iov.iov_base = task->buf; + task->iov.iov_len = g_io_size; + spdk_bdev_writev(bdev, &task->iov, 1, g_io_size, + offset_in_ios * g_io_size, + bdevperf_complete, task); + } + + target->current_queue_depth++; +} + +static void +bdevperf_submit_io(struct io_target *target, int queue_depth) +{ + while (queue_depth-- > 0) { + bdevperf_submit_single(target); + } +} + +static void +end_target(struct rte_timer *timer, void *arg) +{ + struct io_target *target = arg; + + if (g_reset) { + rte_timer_stop_sync(&target->reset_timer); + } + + target->is_draining = true; +} + +static void reset_target(struct rte_timer *timer, void *arg); + +static void +reset_cb(spdk_event_t event) +{ + struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event); + int status = bdev_io->status; + struct bdevperf_task *task = bdev_io->caller_ctx; + struct io_target *target = task->target; + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + printf("Reset blockdev=%s failed\n", target->bdev->name); + g_run_failed = true; + } + + rte_mempool_put(task_pool, task); + + rte_timer_reset(&target->reset_timer, rte_get_timer_hz() * 10, SINGLE, + target->lcore, reset_target, target); +} + +static void +reset_target(struct rte_timer *timer, void *arg) +{ + struct io_target *target = arg; + struct bdevperf_task *task = NULL; + + /* Do reset. */ + rte_mempool_get(task_pool, (void **)&task); + task->target = target; + spdk_bdev_reset(target->bdev, SPDK_BDEV_RESET_SOFT, + reset_cb, task); +} + +static void +bdevperf_submit_on_core(spdk_event_t event) +{ + struct io_target *target = spdk_event_get_arg1(event); + + /* Submit initial I/O for each block device. Each time one + * completes, another will be submitted. */ + while (target != NULL) { + /* Start a timer to stop this I/O chain when the run is over */ + rte_timer_reset(&target->run_timer, rte_get_timer_hz() * g_time_in_sec, SINGLE, + target->lcore, end_target, target); + if (g_reset) { + rte_timer_reset(&target->reset_timer, rte_get_timer_hz() * 10, SINGLE, + target->lcore, reset_target, target); + } + bdevperf_submit_io(target, g_queue_depth); + target = target->next; + } +} + +static void usage(char *program_name) +{ + printf("%s options\n", program_name); + printf("\t[-c configuration file]\n"); + printf("\t[-m core mask for distributing I/O submission/completion work\n"); + printf("\t\t(default: 0x1 - use core 0 only)]\n"); + printf("\t[-q io depth]\n"); + printf("\t[-s io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw, verify, reset)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-S Show performance result in real time]\n"); +} + +static void +performance_dump(int io_time) +{ + int index; + unsigned lcore_id; + float io_per_second, mb_per_second; + float total_io_per_second, total_mb_per_second; + struct io_target *target; + + total_io_per_second = 0; + total_mb_per_second = 0; + for (index = 0; index < spdk_app_get_core_count(); index++) { + target = head[index]; + if (target != NULL) { + lcore_id = target->lcore; + printf("\r Logical core: %d\n", lcore_id); + } + while (target != NULL) { + io_per_second = (float)target->io_completed / + io_time; + mb_per_second = io_per_second * g_io_size / + (1024 * 1024); + printf("\r %-20s: %10.2f IO/s %10.2f MB/s\n", + target->bdev->name, io_per_second, + mb_per_second); + total_io_per_second += io_per_second; + total_mb_per_second += mb_per_second; + target = target->next; + } + } + + printf("\r =====================================================\n"); + printf("\r %-20s: %10.2f IO/s %10.2f MB/s\n", + "Total", total_io_per_second, total_mb_per_second); + fflush(stdout); + +} + +static void +performance_statistics_thread(struct rte_timer *timer, void *arg) +{ + performance_dump(1); +} + +static void +bdevperf_run(spdk_event_t evt) +{ + int i; + struct io_target *target; + spdk_event_t event; + + printf("Running I/O for %d seconds...\n", g_time_in_sec); + fflush(stdout); + + /* Start a timer to dump performance numbers */ + if (g_show_performance_real_time) { + rte_timer_init(&g_perf_timer); + rte_timer_reset(&g_perf_timer, rte_get_timer_hz(), PERIODICAL, + rte_get_master_lcore(), performance_statistics_thread, NULL); + } + + /* Send events to start all I/O */ + RTE_LCORE_FOREACH(i) { + if (spdk_app_get_core_mask() & (1ULL << i)) { + target = head[i]; + if (target != NULL) { + event = spdk_event_allocate(target->lcore, bdevperf_submit_on_core, + target, NULL, NULL); + spdk_event_call(event); + } + } + } +} + +int +main(int argc, char **argv) +{ + const char *config_file; + const char *core_mask; + const char *workload_type; + int op; + bool mix_specified; + + /* default value*/ + config_file = NULL; + g_queue_depth = 0; + g_io_size = 0; + workload_type = NULL; + g_time_in_sec = 0; + mix_specified = false; + core_mask = NULL; + + while ((op = getopt(argc, argv, "c:m:q:s:t:w:M:S")) != -1) { + switch (op) { + case 'c': + config_file = optarg; + break; + case 'm': + core_mask = optarg; + break; + case 'q': + g_queue_depth = atoi(optarg); + break; + case 's': + g_io_size = atoi(optarg); + break; + case 't': + g_time_in_sec = atoi(optarg); + break; + case 'w': + workload_type = optarg; + break; + case 'M': + g_rw_percentage = atoi(optarg); + mix_specified = true; + break; + case 'S': + g_show_performance_real_time = 1; + break; + default: + usage(argv[0]); + exit(1); + } + } + + if (!config_file) { + usage(argv[0]); + exit(1); + } + if (!g_queue_depth) { + usage(argv[0]); + exit(1); + } + if (!g_io_size) { + usage(argv[0]); + exit(1); + } + if (!workload_type) { + usage(argv[0]); + exit(1); + } + if (!g_time_in_sec) { + usage(argv[0]); + exit(1); + } + + if (strcmp(workload_type, "read") && + strcmp(workload_type, "write") && + strcmp(workload_type, "randread") && + strcmp(workload_type, "randwrite") && + strcmp(workload_type, "rw") && + strcmp(workload_type, "randrw") && + strcmp(workload_type, "verify") && + strcmp(workload_type, "reset") && + strcmp(workload_type, "unmap")) { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw, verify, reset, unmap)\n"); + exit(1); + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread")) { + g_rw_percentage = 100; + } + + if (!strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + g_rw_percentage = 0; + } + + if (!strcmp(workload_type, "verify") || + !strcmp(workload_type, "reset") || + !strcmp(workload_type, "unmap")) { + g_rw_percentage = 50; + if (g_io_size > SPDK_BDEV_LARGE_RBUF_MAX_SIZE) { + fprintf(stderr, "Unable to exceed max I/O size of %d for verify. (%d provided).\n", + SPDK_BDEV_LARGE_RBUF_MAX_SIZE, g_io_size); + exit(1); + } + if (core_mask) { + fprintf(stderr, "Ignoring -m option. Verify can only run with a single core.\n"); + core_mask = NULL; + } + g_verify = true; + if (!strcmp(workload_type, "reset")) { + g_reset = true; + } + if (!strcmp(workload_type, "unmap")) { + g_unmap = true; + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite") || + !strcmp(workload_type, "verify") || + !strcmp(workload_type, "reset") || + !strcmp(workload_type, "unmap")) { + if (mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(workload_type, "rw") || + !strcmp(workload_type, "randrw")) { + if (g_rw_percentage < 0 || g_rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + exit(1); + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "rw") || + !strcmp(workload_type, "verify") || + !strcmp(workload_type, "reset") || + !strcmp(workload_type, "unmap")) { + g_is_random = 0; + } else { + g_is_random = 1; + } + + if (g_io_size > SPDK_BDEV_LARGE_RBUF_MAX_SIZE) { + fprintf(stdout, "I/O size of %d is greather than zero copy threshold (%d).\n", + g_io_size, SPDK_BDEV_LARGE_RBUF_MAX_SIZE); + fprintf(stdout, "Zero copy mechanism will not be used.\n"); + g_zcopy = false; + } + + optind = 1; /*reset the optind */ + + rte_set_log_level(RTE_LOG_ERR); + + blockdev_heads_init(); + + bdevtest_init(config_file, core_mask); + + bdevperf_construct_targets(); + + if (g_bdevs == NULL) { + printf("No blockdevs available.\n"); + return 1; + } + + task_pool = rte_mempool_create("task_pool", 4096 * spdk_app_get_core_count(), + sizeof(struct bdevperf_task), + 64, 0, NULL, NULL, task_ctor, NULL, + SOCKET_ID_ANY, 0); + + spdk_app_start(bdevperf_run, NULL, NULL); + + performance_dump(g_time_in_sec); + spdk_app_fini(); + printf("done.\n"); + return 0; +} diff --git a/test/lib/bdev/blockdev.sh b/test/lib/bdev/blockdev.sh new file mode 100755 index 000000000..cf6d0af9c --- /dev/null +++ b/test/lib/bdev/blockdev.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -e + +testdir=$(readlink -f $(dirname $0)) +rootdir=$testdir/../../.. +source $rootdir/scripts/autotest_common.sh + +testdir=$(readlink -f $(dirname $0)) + +timing_enter blockdev + +timing_enter bounds +$testdir/bdevio/bdevio $testdir/bdev.conf +process_core +timing_exit bounds + +timing_enter verify +$testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 32 -s 4096 -w verify -t 5 +process_core +timing_exit verify + +# Use size 192KB which both exceeds typical 128KB max NVMe I/O +# size and will cross 128KB Intel DC P3700 stripe boundaries. +timing_enter perf +$testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 128 -w read -s 196608 -t 5 +process_core +timing_exit perf + +if [ $RUN_NIGHTLY -eq 1 ]; then + timing_enter reset + $testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 16 -w reset -s 4096 -t 60 + process_core + timing_exit reset + + timing_enter unmap + $testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 1 -w unmap -s 4096 -t 60 + process_core + timing_exit unmap +fi + +timing_exit blockdev diff --git a/test/lib/bdev/common.c b/test/lib/bdev/common.c new file mode 100644 index 000000000..c7b45285b --- /dev/null +++ b/test/lib/bdev/common.c @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright (C) 2008-2012 Daisuke Aoyama . + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* This file is included in the bdev test tools, not compiled separately. */ + +#include "spdk/event.h" + +struct blockdev_entry { + struct spdk_bdev *bdev; + struct blockdev_entry *next; +}; + +struct blockdev_entry *g_bdevs = NULL; + +int +spdk_bdev_db_add(struct spdk_bdev *bdev) +{ + struct blockdev_entry *bdev_entry = calloc(1, sizeof(struct blockdev_entry)); + + if (bdev_entry == NULL) { + return -ENOMEM; + } + + bdev_entry->bdev = bdev; + + bdev_entry->next = g_bdevs; + g_bdevs = bdev_entry; + + return 0; +} + +int +spdk_bdev_db_delete(struct spdk_bdev *bdev) +{ + /* Deleting is not important */ + return 0; +} + +struct spdk_bdev * +spdk_bdev_db_get_by_name(const char *bdev_name) +{ + struct blockdev_entry *bdev_entry = g_bdevs; + + while (bdev_entry != NULL) { + if (strcmp(bdev_name, bdev_entry->bdev->name) == 0) { + return bdev_entry->bdev; + } + bdev_entry = bdev_entry->next; + } + + return NULL; +} + +static void +bdevtest_init(const char *config_file, const char *cpumask) +{ + struct spdk_app_opts opts; + + spdk_app_opts_init(&opts); + opts.name = "bdevtest"; + opts.config_file = config_file; + opts.reactor_mask = cpumask; + spdk_app_init(&opts); +}