bdev: add block device abstraction layer

Change-Id: I235cf146a52714756c9782c03b118f518c5f5182
Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
Daniel Verkamp 2016-07-20 11:16:23 -07:00
parent b9f3538e5c
commit 861e78bf48
26 changed files with 4260 additions and 2 deletions

View File

@ -54,6 +54,7 @@ timing_exit nvmf_setup
timing_enter lib
time test/lib/bdev/blockdev.sh
time test/lib/event/event.sh
time test/lib/nvme/nvme.sh
time test/lib/nvmf/nvmf.sh

431
include/spdk/bdev.h Normal file
View File

@ -0,0 +1,431 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* Block device abstraction layer
*/
#ifndef SPDK_BDEV_H_
#define SPDK_BDEV_H_
#include <inttypes.h>
#include <unistd.h>
#include <stddef.h> /* for offsetof */
#include <sys/uio.h> /* for struct iovec */
#include <stdbool.h>
#include "spdk/event.h"
#include "spdk/queue.h"
#include "spdk/scsi_spec.h"
#define SPDK_BDEV_SMALL_RBUF_MAX_SIZE 8192
#define SPDK_BDEV_LARGE_RBUF_MAX_SIZE (64 * 1024)
#define SPDK_BDEV_MAX_NAME_LENGTH 16
#define SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH 50
struct spdk_bdev_io;
/** \page block_backend_modules Block Device Backend Modules
To implement a backend block device driver, a number of functions
dictated by struct spdk_bdev_fn_table must be provided.
The module should register itself using SPDK_BDEV_MODULE_REGISTER or
SPDK_VBDEV_MODULE_REGISTER to define the parameters for the module.
Use SPDK_BDEV_MODULE_REGISTER for all block backends that are real disks.
Any virtual backends such as RAID, partitioning, etc. should use
SPDK_VBDEV_MODULE_REGISTER.
<hr>
In the module initialization code, the config file sections can be parsed to
acquire custom configuration parameters. For example, if the config file has
a section such as below:
<blockquote><pre>
[MyBE]
MyParam 1234
</pre></blockquote>
The value can be extracted as the example below:
<blockquote><pre>
struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "MyBe");
int my_param = spdk_conf_section_get_intval(sp, "MyParam");
</pre></blockquote>
The backend initialization routine also need to create "disks". A virtual
representation of each LUN must be constructed. Mainly a struct spdk_bdev
must be passed to the bdev database via spdk_bdev_register().
*/
/**
* \brief SPDK block device.
*
* This is a virtual representation of a block device that is exported by the backend.
*/
struct spdk_bdev {
/** User context passed in by the backend */
void *ctxt;
/** Unique name for this block device. */
char name[SPDK_BDEV_MAX_NAME_LENGTH];
/** Unique product name for this kind of block device. */
char product_name[SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH];
/** Size in bytes of a logical block for the backend */
uint64_t blocklen;
/** Number of blocks */
uint64_t blockcnt;
/** write cache enabled, not used at the moment */
int write_cache;
/**
* This is used to make sure buffers are sector aligned.
* This causes double buffering on writes.
*/
int need_aligned_buffer;
/** thin provisioning, not used at the moment */
int thin_provisioning;
/** function table for all LUN ops */
struct spdk_bdev_fn_table *fn_table;
/** Represents maximum unmap block descriptor count */
uint32_t max_unmap_bdesc_count;
/** array of child block dev that is underneath of the current dev */
struct spdk_bdev **child_bdevs;
/** number of child blockdevs allocated */
int num_child_bdevs;
/** generation value used by block device reset */
uint32_t gencnt;
/** Whether the poller is registered with the reactor */
bool is_running;
/** Poller to submit IO and check completion */
struct spdk_poller poller;
/** True if another blockdev or a LUN is using this device */
bool claimed;
};
/**
* Function table for a block device backend.
*
* The backend block device function table provides a set of APIs to allow
* communication with a backend. The main commands are read/write API
* calls for I/O via submit_request.
*/
struct spdk_bdev_fn_table {
/** Destroy the backend block device object */
int (*destruct)(struct spdk_bdev *bdev);
/** Poll the backend for I/O waiting to be completed. */
int (*check_io)(struct spdk_bdev *bdev);
/** Process the IO. */
void (*submit_request)(struct spdk_bdev_io *);
/** Release buf for read command. */
void (*free_request)(struct spdk_bdev_io *);
};
/** Blockdev I/O type */
enum spdk_bdev_io_type {
SPDK_BDEV_IO_TYPE_INVALID,
SPDK_BDEV_IO_TYPE_READ,
SPDK_BDEV_IO_TYPE_WRITE,
SPDK_BDEV_IO_TYPE_UNMAP,
SPDK_BDEV_IO_TYPE_FLUSH,
SPDK_BDEV_IO_TYPE_RESET,
};
/** Blockdev I/O completion status */
enum spdk_bdev_io_status {
SPDK_BDEV_IO_STATUS_FAILED = -1,
SPDK_BDEV_IO_STATUS_PENDING = 0,
SPDK_BDEV_IO_STATUS_SUCCESS = 1,
};
/** Blockdev reset operation type */
enum spdk_bdev_reset_type {
/**
* A hard reset indicates that the blockdev layer should not
* invoke the completion callback for I/Os issued before the
* reset is issued but completed after the reset is complete.
*/
SPDK_BDEV_RESET_HARD,
/**
* A soft reset indicates that the blockdev layer should still
* invoke the completion callback for I/Os issued before the
* reset is issued but completed after the reset is complete.
*/
SPDK_BDEV_RESET_SOFT,
};
typedef spdk_event_fn spdk_bdev_io_completion_cb;
typedef void (*spdk_bdev_io_get_rbuf_cb)(struct spdk_bdev_io *bdev_io);
/**
* Block device I/O
*
* This is an I/O that is passed to an spdk_bdev.
*/
struct spdk_bdev_io {
/** Pointer to scratch area reserved for use by the driver consuming this spdk_bdev_io. */
void *ctx;
/** Generation value for each I/O. */
uint32_t gencnt;
/** The block device that this I/O belongs to. */
struct spdk_bdev *bdev;
/** Enumerated value representing the I/O type. */
enum spdk_bdev_io_type type;
union {
struct {
/** The unaligned rbuf originally allocated. */
void *buf_unaligned;
/** For single buffer cases, pointer to the aligned data buffer. */
void *buf;
/** For single buffer cases, size of the data buffer. */
uint64_t nbytes;
/** Starting offset (in bytes) of the blockdev for this I/O. */
uint64_t offset;
/** Indicate whether the blockdev layer to put rbuf or not. */
bool put_rbuf;
} read;
struct {
/** For basic write case, use our own iovec element */
struct iovec iov;
/** For SG buffer cases, array of iovecs to transfer. */
struct iovec *iovs;
/** For SG buffer cases, number of iovecs in iovec array. */
int iovcnt;
/** For SG buffer cases, total size of data to be transferred. */
size_t len;
/** Starting offset (in bytes) of the blockdev for this I/O. */
uint64_t offset;
} write;
struct {
/** Represents the unmap block descriptors. */
struct spdk_scsi_unmap_bdesc *unmap_bdesc;
/** Count of unmap block descriptors. */
uint16_t bdesc_count;
} unmap;
struct {
/** Represents starting offset in bytes of the range to be flushed. */
uint64_t offset;
/** Represents the number of bytes to be flushed, starting at offset. */
uint64_t length;
} flush;
struct {
int32_t type;
} reset;
} u;
/** User function that will be called when this completes */
spdk_bdev_io_completion_cb cb;
/** Context that will be passed to the completion callback */
void *caller_ctx;
struct spdk_event *cb_event;
/** Callback for when rbuf is allocated */
spdk_bdev_io_get_rbuf_cb get_rbuf_cb;
/** Status for the IO */
enum spdk_bdev_io_status status;
/** Used in virtual device (e.g., RAID), indicates its parent spdk_bdev_io **/
void *parent;
/** Used in virtual device (e.g., RAID) for storing multiple child device I/Os **/
TAILQ_HEAD(child_io, spdk_bdev_io) child_io;
/** Member used for linking child I/Os together. */
TAILQ_ENTRY(spdk_bdev_io) link;
/** Number of children for this I/O */
int children;
/** Entry to the list need_buf of struct spdk_bdev. */
TAILQ_ENTRY(spdk_bdev_io) rbuf_link;
/** Per I/O context for use by the blockdev module */
uint8_t driver_ctx[0];
/* No members may be added after driver_ctx! */
};
/** Block device module */
struct spdk_bdev_module_if {
/**
* Initialization function for the module. Called by the spdk
* application during startup.
*
* Modules are required to define this function.
*/
int (*module_init)(void);
/**
* Finish function for the module. Called by the spdk application
* before the spdk application exits to perform any necessary cleanup.
*
* Modules are not required to define this function.
*/
void (*module_fini)(void);
/**
* Function called to return a text string representing the
* module's configuration options for inclusion in a configuration file.
*/
void (*config_text)(FILE *fp);
/** Name for the modules being defined. */
const char *module_name;
/**
* Returns the allocation size required for the backend for uses such as local
* command structs, local SGL, iovecs, or other user context.
*/
int (*get_ctx_size)(void);
TAILQ_ENTRY(spdk_bdev_module_if) tailq;
};
/* The blockdev API has two distinct parts. The first portion of the API
* is to be used by the layer above the blockdev in order to communicate
* with it. The second portion of the API is to be used by the blockdev
* modules themselves to perform operations like completing I/O.
*/
/* The following functions are intended to be called from the upper layer
* that is using the blockdev layer.
*/
struct spdk_bdev_io *spdk_bdev_read(struct spdk_bdev *bdev,
void *buf, uint64_t nbytes, uint64_t offset,
spdk_bdev_io_completion_cb cb, void *cb_arg);
struct spdk_bdev_io *spdk_bdev_write(struct spdk_bdev *bdev,
void *buf, uint64_t nbytes, uint64_t offset,
spdk_bdev_io_completion_cb cb, void *cb_arg);
struct spdk_bdev_io *spdk_bdev_writev(struct spdk_bdev *bdev,
struct iovec *iov, int iovcnt,
uint64_t len, uint64_t offset,
spdk_bdev_io_completion_cb cb, void *cb_arg);
struct spdk_bdev_io *spdk_bdev_unmap(struct spdk_bdev *bdev,
struct spdk_scsi_unmap_bdesc *unmap_d,
uint16_t bdesc_count,
spdk_bdev_io_completion_cb cb, void *cb_arg);
struct spdk_bdev_io *spdk_bdev_flush(struct spdk_bdev *bdev,
uint64_t offset, uint64_t length,
spdk_bdev_io_completion_cb cb, void *cb_arg);
int spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io);
void spdk_bdev_do_work(void *ctx);
int spdk_bdev_reset(struct spdk_bdev *bdev, int reset_type,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/* The remaining functions are intended to be called from within
* blockdev modules.
*/
void spdk_bdev_register(struct spdk_bdev *bdev);
void spdk_bdev_unregister(struct spdk_bdev *bdev);
int spdk_bdev_free_io(struct spdk_bdev_io *bdev_io);
void spdk_bdev_io_get_rbuf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_rbuf_cb cb);
struct spdk_bdev_io *spdk_bdev_get_io(void);
struct spdk_bdev_io *spdk_bdev_get_child_io(struct spdk_bdev_io *parent,
struct spdk_bdev *bdev,
spdk_bdev_io_completion_cb cb,
void *cb_arg);
void spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io,
enum spdk_bdev_io_status status);
void spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module);
void spdk_vbdev_module_list_add(struct spdk_bdev_module_if *vbdev_module);
static inline struct spdk_bdev_io *
spdk_bdev_io_from_ctx(void *ctx)
{
return (struct spdk_bdev_io *)
((uintptr_t)ctx - offsetof(struct spdk_bdev_io, driver_ctx));
}
#define SPDK_BDEV_MODULE_REGISTER(init_fn, fini_fn, config_fn, ctx_size_fn) \
static struct spdk_bdev_module_if init_fn ## _if = { \
.module_init = init_fn, \
.module_fini = fini_fn, \
.config_text = config_fn, \
.get_ctx_size = ctx_size_fn, \
}; \
__attribute__((constructor)) static void init_fn ## _init(void) \
{ \
spdk_bdev_module_list_add(&init_fn ## _if); \
}
#define SPDK_VBDEV_MODULE_REGISTER(init_fn, fini_fn, config_fn, ctx_size_fn) \
static struct spdk_bdev_module_if init_fn ## _if = { \
.module_init = init_fn, \
.module_fini = fini_fn, \
.config_text = config_fn, \
.get_ctx_size = ctx_size_fn, \
}; \
__attribute__((constructor)) static void init_fn ## _init(void) \
{ \
spdk_vbdev_module_list_add(&init_fn ## _if); \
}
#endif /* SPDK_BDEV_H_ */

57
include/spdk/bdev_db.h Normal file
View File

@ -0,0 +1,57 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* Block device database
*/
#ifndef SPDK_BDEV_DB_H_
#define SPDK_BDEV_DB_H_
#ifdef __cplusplus
extern "C" {
#endif
struct spdk_bdev;
int spdk_bdev_db_add(struct spdk_bdev *bdev);
int spdk_bdev_db_delete(struct spdk_bdev *bdev);
struct spdk_bdev *spdk_bdev_db_get_by_name(const char *bdev_name);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -40,6 +40,11 @@
#define SPDK_SCSI_SPEC_H
#include <stdint.h>
#ifdef __linux__
#include <endian.h>
#elif defined(__FreeBSD__)
#include <sys/endian.h>
#endif
#include "spdk/assert.h"

View File

@ -34,7 +34,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
DIRS-y += conf copy cunit event json jsonrpc log memory rpc trace util nvme nvmf ioat
DIRS-y += bdev conf copy cunit event json jsonrpc log memory rpc trace util nvme nvmf ioat
.PHONY: all clean $(DIRS-y)

43
lib/bdev/Makefile Normal file
View File

@ -0,0 +1,43 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
CFLAGS += $(DPDK_INC)
C_SRCS = bdev.c bdev_db.c
LIBNAME = bdev
DIRS-y += malloc nvme
include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk

807
lib/bdev/bdev.c Normal file
View File

@ -0,0 +1,807 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk/bdev.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <rte_config.h>
#include <rte_malloc.h>
#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_version.h>
#include "spdk/bdev_db.h"
#include "spdk/event.h"
#include "spdk/log.h"
#include "spdk/queue.h"
#define SPDK_BDEV_IO_POOL_SIZE (64 * 1024)
#define RBUF_SMALL_POOL_SIZE 8192
#define RBUF_LARGE_POOL_SIZE 1024
static struct rte_mempool *spdk_bdev_g_io_pool = NULL;
static struct rte_mempool *g_rbuf_small_pool = NULL;
static struct rte_mempool *g_rbuf_large_pool = NULL;
typedef TAILQ_HEAD(, spdk_bdev_io) need_rbuf_tailq_t;
static need_rbuf_tailq_t g_need_rbuf_small[RTE_MAX_LCORE];
static need_rbuf_tailq_t g_need_rbuf_large[RTE_MAX_LCORE];
static TAILQ_HEAD(, spdk_bdev_module_if) spdk_bdev_module_list =
TAILQ_HEAD_INITIALIZER(spdk_bdev_module_list);
static TAILQ_HEAD(, spdk_bdev_module_if) spdk_vbdev_module_list =
TAILQ_HEAD_INITIALIZER(spdk_vbdev_module_list);
static void
spdk_bdev_io_set_rbuf(struct spdk_bdev_io *bdev_io, void *buf)
{
RTE_VERIFY(bdev_io->get_rbuf_cb != NULL);
RTE_VERIFY(buf != NULL);
bdev_io->u.read.buf_unaligned = buf;
bdev_io->u.read.buf = (void *)((unsigned long)((char *)buf + 512) & ~511UL);
bdev_io->u.read.put_rbuf = true;
bdev_io->get_rbuf_cb(bdev_io);
}
static void
spdk_bdev_io_put_rbuf(struct spdk_bdev_io *bdev_io)
{
struct rte_mempool *pool;
void *buf;
need_rbuf_tailq_t *tailq;
uint64_t length;
length = bdev_io->u.read.nbytes;
buf = bdev_io->u.read.buf_unaligned;
if (length <= SPDK_BDEV_SMALL_RBUF_MAX_SIZE) {
pool = g_rbuf_small_pool;
tailq = &g_need_rbuf_small[rte_lcore_id()];
} else {
pool = g_rbuf_large_pool;
tailq = &g_need_rbuf_large[rte_lcore_id()];
}
if (TAILQ_EMPTY(tailq)) {
rte_mempool_put(pool, buf);
} else {
bdev_io = TAILQ_FIRST(tailq);
TAILQ_REMOVE(tailq, bdev_io, rbuf_link);
spdk_bdev_io_set_rbuf(bdev_io, buf);
}
}
static int spdk_initialize_rbuf_pool(void)
{
int cache_size;
/**
* Ensure no more than half of the total buffers end up local caches, by
* using spdk_event_get_active_core_count() to determine how many local caches we need
* to account for.
*/
cache_size = RBUF_SMALL_POOL_SIZE / (2 * spdk_app_get_core_count());
if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE)
cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
g_rbuf_small_pool = rte_mempool_create("rbuf_small_pool",
RBUF_SMALL_POOL_SIZE,
SPDK_BDEV_SMALL_RBUF_MAX_SIZE + 512,
cache_size, 0, NULL, NULL, NULL, NULL,
SOCKET_ID_ANY, 0);
if (!g_rbuf_small_pool) {
SPDK_ERRLOG("create rbuf small pool failed\n");
return -1;
}
cache_size = RBUF_LARGE_POOL_SIZE / (2 * spdk_app_get_core_count());
if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE)
cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
g_rbuf_large_pool = rte_mempool_create("rbuf_large_pool",
RBUF_LARGE_POOL_SIZE,
SPDK_BDEV_LARGE_RBUF_MAX_SIZE + 512,
cache_size, 0, NULL, NULL, NULL, NULL,
SOCKET_ID_ANY, 0);
if (!g_rbuf_large_pool) {
SPDK_ERRLOG("create rbuf large pool failed\n");
return -1;
}
return 0;
}
static int
spdk_bdev_module_get_max_ctx_size(void)
{
struct spdk_bdev_module_if *bdev_module;
int max_bdev_module_size = 0;
TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) {
if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) {
max_bdev_module_size = bdev_module->get_ctx_size();
}
}
TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) {
if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) {
max_bdev_module_size = bdev_module->get_ctx_size();
}
}
return max_bdev_module_size;
}
static int
spdk_bdev_module_initialize(void)
{
struct spdk_bdev_module_if *bdev_module;
int rc = 0;
TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) {
rc = bdev_module->module_init();
if (rc)
return rc;
}
TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) {
rc = bdev_module->module_init();
if (rc)
return rc;
}
return rc;
}
static void
spdk_bdev_module_finish(void)
{
struct spdk_bdev_module_if *bdev_module;
TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) {
if (bdev_module->module_fini) {
bdev_module->module_fini();
}
}
TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) {
if (bdev_module->module_fini) {
bdev_module->module_fini();
}
}
}
static void
spdk_bdev_config_text(FILE *fp)
{
struct spdk_bdev_module_if *bdev_module;
TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) {
if (bdev_module->config_text) {
bdev_module->config_text(fp);
}
}
TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) {
if (bdev_module->config_text) {
bdev_module->config_text(fp);
}
}
}
static int
spdk_bdev_initialize(void)
{
int i;
if (spdk_bdev_module_initialize()) {
SPDK_ERRLOG("bdev module initialize failed");
return -1;
}
spdk_bdev_g_io_pool = rte_mempool_create("blockdev_io",
SPDK_BDEV_IO_POOL_SIZE,
sizeof(struct spdk_bdev_io) +
spdk_bdev_module_get_max_ctx_size(),
64, 0,
NULL, NULL, NULL, NULL,
SOCKET_ID_ANY, 0);
if (spdk_bdev_g_io_pool == NULL) {
SPDK_ERRLOG("could not allocate spdk_bdev_io pool");
return -1;
}
for (i = 0; i < RTE_MAX_LCORE; i++) {
TAILQ_INIT(&g_need_rbuf_small[i]);
TAILQ_INIT(&g_need_rbuf_large[i]);
}
return spdk_initialize_rbuf_pool();
}
/*
* Wrapper to provide rte_mempool_avail_count() on older DPDK versions.
* Drop this if the minimum DPDK version is raised to at least 16.07.
*/
#if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1)
static unsigned rte_mempool_avail_count(const struct rte_mempool *pool)
{
return rte_mempool_count(pool);
}
#endif
static int
spdk_bdev_check_pool(struct rte_mempool *pool, uint32_t count)
{
if (rte_mempool_avail_count(pool) != count) {
SPDK_ERRLOG("rte_mempool_avail_count(%s) == %d, should be %d\n",
pool->name, rte_mempool_avail_count(pool), count);
return -1;
} else {
return 0;
}
}
static int
spdk_bdev_finish(void)
{
int rc = 0;
spdk_bdev_module_finish();
rc += spdk_bdev_check_pool(g_rbuf_small_pool, RBUF_SMALL_POOL_SIZE);
rc += spdk_bdev_check_pool(g_rbuf_large_pool, RBUF_LARGE_POOL_SIZE);
return (rc != 0);
}
struct spdk_bdev_io *spdk_bdev_get_io(void)
{
struct spdk_bdev_io *bdev_io;
int rc;
rc = rte_mempool_get(spdk_bdev_g_io_pool, (void **)&bdev_io);
if (rc < 0 || !bdev_io) {
SPDK_ERRLOG("Unable to get spdk_bdev_io\n");
rte_panic("no memory\n");
}
memset(bdev_io, 0, sizeof(*bdev_io));
return bdev_io;
}
static void
spdk_bdev_put_io(struct spdk_bdev_io *bdev_io)
{
if (!bdev_io) {
return;
}
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ && bdev_io->u.read.put_rbuf) {
spdk_bdev_io_put_rbuf(bdev_io);
}
rte_mempool_put(spdk_bdev_g_io_pool, bdev_io);
}
static void
_spdk_bdev_io_get_rbuf(struct spdk_bdev_io *bdev_io)
{
uint64_t len = bdev_io->u.read.nbytes;
struct rte_mempool *pool;
need_rbuf_tailq_t *tailq;
int rc;
void *buf = NULL;
if (len <= SPDK_BDEV_SMALL_RBUF_MAX_SIZE) {
pool = g_rbuf_small_pool;
tailq = &g_need_rbuf_small[rte_lcore_id()];
} else {
pool = g_rbuf_large_pool;
tailq = &g_need_rbuf_large[rte_lcore_id()];
}
rc = rte_mempool_get(pool, (void **)&buf);
if (rc < 0 || !buf) {
TAILQ_INSERT_TAIL(tailq, bdev_io, rbuf_link);
} else {
spdk_bdev_io_set_rbuf(bdev_io, buf);
}
}
static void
spdk_bdev_cleanup_pending_rbuf_io(struct spdk_bdev *bdev)
{
struct spdk_bdev_io *bdev_io, *tmp;
TAILQ_FOREACH_SAFE(bdev_io, &g_need_rbuf_small[rte_lcore_id()], rbuf_link, tmp) {
if (bdev_io->bdev == bdev) {
TAILQ_REMOVE(&g_need_rbuf_small[rte_lcore_id()], bdev_io, rbuf_link);
bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED;
}
}
TAILQ_FOREACH_SAFE(bdev_io, &g_need_rbuf_large[rte_lcore_id()], rbuf_link, tmp) {
if (bdev_io->bdev == bdev) {
TAILQ_REMOVE(&g_need_rbuf_large[rte_lcore_id()], bdev_io, rbuf_link);
bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED;
}
}
}
static void
spdk_bdev_io_free_request(struct spdk_bdev_io *bdev_io)
{
bdev_io->bdev->fn_table->free_request(bdev_io);
spdk_bdev_put_io(bdev_io);
}
static void
__submit_request(spdk_event_t event)
{
struct spdk_bdev *bdev = spdk_event_get_arg1(event);
struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event);
bdev_io->cb_event = spdk_event_get_next(event);
if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) {
if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) {
spdk_bdev_cleanup_pending_rbuf_io(bdev);
}
bdev->fn_table->submit_request(bdev_io);
} else {
spdk_bdev_io_free_request(bdev_io);
}
}
void
spdk_bdev_do_work(void *ctx)
{
struct spdk_bdev *bdev = ctx;
bdev->fn_table->check_io(bdev->ctxt);
}
int
spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io)
{
struct spdk_bdev *bdev = bdev_io->bdev;
struct spdk_event *event, *cb_event = NULL;
uint32_t lcore = bdev->poller.lcore;
/* start the poller when first IO comes */
if (!bdev->is_running) {
bdev->is_running = true;
if (lcore == 0) {
lcore = rte_lcore_id();
}
spdk_poller_register(&bdev->poller, lcore, NULL);
}
if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) {
cb_event = spdk_event_allocate(rte_lcore_id(), bdev_io->cb,
bdev_io->caller_ctx, bdev_io, NULL);
RTE_VERIFY(cb_event != NULL);
}
event = spdk_event_allocate(lcore, __submit_request, bdev, bdev_io, cb_event);
RTE_VERIFY(event != NULL);
spdk_event_call(event);
return 0;
}
static void
spdk_bdev_io_init(struct spdk_bdev_io *bdev_io,
struct spdk_bdev *bdev, void *cb_arg,
spdk_bdev_io_completion_cb cb)
{
bdev_io->bdev = bdev;
bdev_io->ctx = bdev->ctxt;
bdev_io->caller_ctx = cb_arg;
bdev_io->cb = cb;
bdev_io->gencnt = bdev->gencnt;
bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING;
bdev_io->children = 0;
TAILQ_INIT(&bdev_io->child_io);
}
struct spdk_bdev_io *
spdk_bdev_get_child_io(struct spdk_bdev_io *parent,
struct spdk_bdev *bdev,
spdk_bdev_io_completion_cb cb,
void *cb_arg)
{
struct spdk_bdev_io *child;
child = spdk_bdev_get_io();
if (!child) {
SPDK_ERRLOG("Unable to get spdk_bdev_io\n");
return NULL;
}
if (cb_arg == NULL) {
cb_arg = child;
}
spdk_bdev_io_init(child, bdev, cb_arg, cb);
child->type = parent->type;
memcpy(&child->u, &parent->u, sizeof(child->u));
if (child->type == SPDK_BDEV_IO_TYPE_READ) {
child->u.read.put_rbuf = false;
}
child->get_rbuf_cb = NULL;
child->parent = parent;
TAILQ_INSERT_TAIL(&parent->child_io, child, link);
parent->children++;
return child;
}
struct spdk_bdev_io *
spdk_bdev_read(struct spdk_bdev *bdev,
void *buf, uint64_t nbytes, uint64_t offset,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev_io *bdev_io;
int rc;
/* Return failure if nbytes is not a multiple of bdev->blocklen */
if (nbytes % bdev->blocklen) {
return NULL;
}
/* Return failure if offset + nbytes is less than offset; indicates there
* has been an overflow and hence the offset has been wrapped around */
if ((offset + nbytes) < offset) {
return NULL;
}
/* Return failure if offset + nbytes exceeds the size of the blockdev */
if ((offset + nbytes) > (bdev->blockcnt * bdev->blocklen)) {
return NULL;
}
bdev_io = spdk_bdev_get_io();
if (!bdev_io) {
SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n");
return NULL;
}
bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
bdev_io->u.read.buf = buf;
bdev_io->u.read.nbytes = nbytes;
bdev_io->u.read.offset = offset;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
rc = spdk_bdev_io_submit(bdev_io);
if (rc < 0) {
spdk_bdev_put_io(bdev_io);
return NULL;
}
return bdev_io;
}
struct spdk_bdev_io *
spdk_bdev_write(struct spdk_bdev *bdev,
void *buf, uint64_t nbytes, uint64_t offset,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev_io *bdev_io;
int rc;
/* Return failure if nbytes is not a multiple of bdev->blocklen */
if (nbytes % bdev->blocklen) {
return NULL;
}
/* Return failure if offset + nbytes is less than offset; indicates there
* has been an overflow and hence the offset has been wrapped around */
if ((offset + nbytes) < offset) {
return NULL;
}
/* Return failure if offset + nbytes exceeds the size of the blockdev */
if ((offset + nbytes) > (bdev->blockcnt * bdev->blocklen)) {
return NULL;
}
bdev_io = spdk_bdev_get_io();
if (!bdev_io) {
SPDK_ERRLOG("blockdev_io memory allocation failed duing writev\n");
return NULL;
}
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
bdev_io->u.write.iov.iov_base = buf;
bdev_io->u.write.iov.iov_len = nbytes;
bdev_io->u.write.iovs = &bdev_io->u.write.iov;
bdev_io->u.write.iovcnt = 1;
bdev_io->u.write.len = nbytes;
bdev_io->u.write.offset = offset;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
rc = spdk_bdev_io_submit(bdev_io);
if (rc < 0) {
spdk_bdev_put_io(bdev_io);
return NULL;
}
return bdev_io;
}
struct spdk_bdev_io *
spdk_bdev_writev(struct spdk_bdev *bdev,
struct iovec *iov, int iovcnt,
uint64_t len, uint64_t offset,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev_io *bdev_io;
int rc;
/* Return failure if len is not a multiple of bdev->blocklen */
if (len % bdev->blocklen) {
return NULL;
}
/* Return failure if offset + nbytes is less than offset; indicates there
* has been an overflow and hence the offset has been wrapped around */
if ((offset + len) < offset) {
return NULL;
}
/* Return failure if offset + len exceeds the size of the blockdev */
if ((offset + len) > (bdev->blockcnt * bdev->blocklen)) {
return NULL;
}
bdev_io = spdk_bdev_get_io();
if (!bdev_io) {
SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n");
return NULL;
}
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
bdev_io->u.write.iovs = iov;
bdev_io->u.write.iovcnt = iovcnt;
bdev_io->u.write.len = len;
bdev_io->u.write.offset = offset;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
rc = spdk_bdev_io_submit(bdev_io);
if (rc < 0) {
spdk_bdev_put_io(bdev_io);
return NULL;
}
return bdev_io;
}
struct spdk_bdev_io *
spdk_bdev_unmap(struct spdk_bdev *bdev,
struct spdk_scsi_unmap_bdesc *unmap_d,
uint16_t bdesc_count,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev_io *bdev_io;
int rc;
bdev_io = spdk_bdev_get_io();
if (!bdev_io) {
SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n");
return NULL;
}
bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP;
bdev_io->u.unmap.unmap_bdesc = unmap_d;
bdev_io->u.unmap.bdesc_count = bdesc_count;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
rc = spdk_bdev_io_submit(bdev_io);
if (rc < 0) {
spdk_bdev_put_io(bdev_io);
return NULL;
}
return bdev_io;
}
struct spdk_bdev_io *
spdk_bdev_flush(struct spdk_bdev *bdev,
uint64_t offset, uint64_t length,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev_io *bdev_io;
int rc;
bdev_io = spdk_bdev_get_io();
if (!bdev_io) {
SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n");
return NULL;
}
bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH;
bdev_io->u.flush.offset = offset;
bdev_io->u.flush.length = length;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
rc = spdk_bdev_io_submit(bdev_io);
if (rc < 0) {
spdk_bdev_put_io(bdev_io);
return NULL;
}
return bdev_io;
}
int
spdk_bdev_reset(struct spdk_bdev *bdev, int reset_type,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev_io *bdev_io;
int rc;
bdev_io = spdk_bdev_get_io();
if (!bdev_io) {
SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n");
return -1;
}
bdev_io->type = SPDK_BDEV_IO_TYPE_RESET;
bdev_io->u.reset.type = reset_type;
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
rc = spdk_bdev_io_submit(bdev_io);
if (rc < 0) {
spdk_bdev_put_io(bdev_io);
SPDK_ERRLOG("reset failed\n");
}
return rc;
}
int
spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
{
int rc;
if (!bdev_io) {
SPDK_ERRLOG("bdev_io is NULL\n");
return -1;
}
if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) {
SPDK_ERRLOG("bdev_io is in pending state\n");
return -1;
}
rc = spdk_bdev_io_submit(bdev_io);
if (rc < 0) {
spdk_bdev_put_io(bdev_io);
SPDK_ERRLOG("free_request failure\n");
}
return rc;
}
void
spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
{
if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) {
/* Successful reset */
if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
/* Increase the blockdev generation if it is a hard reset */
if (bdev_io->u.reset.type == SPDK_BDEV_RESET_HARD) {
bdev_io->bdev->gencnt++;
}
}
} else {
/*
* Check the gencnt, to see if this I/O was issued before the most
* recent reset. If the gencnt is not equal, then just free the I/O
* without calling the callback, since the caller will have already
* freed its context for this I/O.
*/
if (bdev_io->bdev->gencnt != bdev_io->gencnt) {
spdk_bdev_put_io(bdev_io);
return;
}
}
bdev_io->status = status;
RTE_VERIFY(bdev_io->cb_event != NULL);
spdk_event_call(bdev_io->cb_event);
}
void
spdk_bdev_register(struct spdk_bdev *bdev)
{
/* initialize the reset generation value to zero */
bdev->gencnt = 0;
bdev->is_running = false;
bdev->poller.fn = spdk_bdev_do_work;
bdev->poller.arg = bdev;
spdk_bdev_db_add(bdev);
}
void
spdk_bdev_unregister(struct spdk_bdev *bdev)
{
int rc;
spdk_bdev_db_delete(bdev);
rc = bdev->fn_table->destruct(bdev->ctxt);
if (rc < 0) {
SPDK_ERRLOG("destruct failed\n");
}
if (bdev->is_running) {
spdk_poller_unregister(&bdev->poller, NULL);
bdev->is_running = false;
}
}
void
spdk_bdev_io_get_rbuf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_rbuf_cb cb)
{
RTE_VERIFY(cb != NULL);
if (bdev_io->u.read.buf == NULL) {
bdev_io->get_rbuf_cb = cb;
_spdk_bdev_io_get_rbuf(bdev_io);
} else {
cb(bdev_io);
}
}
void spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module)
{
TAILQ_INSERT_TAIL(&spdk_bdev_module_list, bdev_module, tailq);
}
void spdk_vbdev_module_list_add(struct spdk_bdev_module_if *vbdev_module)
{
TAILQ_INSERT_TAIL(&spdk_vbdev_module_list, vbdev_module, tailq);
}
SPDK_SUBSYSTEM_REGISTER(bdev, spdk_bdev_initialize, spdk_bdev_finish, spdk_bdev_config_text)
SPDK_SUBSYSTEM_DEPEND(bdev, copy)

105
lib/bdev/bdev_db.c Normal file
View File

@ -0,0 +1,105 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk/bdev_db.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include "spdk/bdev.h"
#include "spdk/log.h"
struct spdk_db_entry {
struct spdk_bdev *bdev;
int claimed;
struct spdk_db_entry *next;
};
static struct spdk_db_entry *bdev_list_head = NULL;
int spdk_bdev_db_add(struct spdk_bdev *bdev)
{
struct spdk_db_entry *new_entry = calloc(1, sizeof(struct spdk_db_entry));
if (!new_entry) {
SPDK_ERRLOG("Failed to allocate DB entry\n");
return -ENOMEM;
}
new_entry->bdev = bdev;
new_entry->next = bdev_list_head;
bdev_list_head = new_entry;
return 0;
}
int spdk_bdev_db_delete(struct spdk_bdev *bdev)
{
struct spdk_db_entry *prev = NULL;
struct spdk_db_entry *node = bdev_list_head;
while (node != NULL) {
if (node->bdev == bdev) {
if (prev != NULL) {
prev->next = node->next;
} else {
bdev_list_head = node->next;
}
free(node);
break;
}
prev = node;
node = node->next;
}
return 0;
}
struct spdk_bdev *spdk_bdev_db_get_by_name(const char *bdev_name)
{
struct spdk_db_entry *current = bdev_list_head;
while (current != NULL) {
struct spdk_bdev *bdev = current->bdev;
if (strncmp(bdev_name, bdev->name, sizeof(bdev->name)) == 0) {
current->claimed++;
return bdev;
}
current = current->next;
}
return NULL;
}

41
lib/bdev/malloc/Makefile Normal file
View File

@ -0,0 +1,41 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
CFLAGS += $(DPDK_INC)
C_SRCS = blockdev_malloc.c blockdev_malloc_rpc.c
LIBNAME = bdev_malloc
include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk

View File

@ -0,0 +1,360 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <errno.h>
#include <rte_config.h>
#include <rte_malloc.h>
#include <rte_memcpy.h>
#include "blockdev_malloc.h"
#include "spdk/bdev.h"
#include "spdk/conf.h"
#include "spdk/log.h"
#include "spdk/copy_engine.h"
struct malloc_disk {
struct spdk_bdev disk; /* this must be the first element */
void *malloc_buf;
struct malloc_disk *next;
};
static void
malloc_done(void *ref, int status)
{
struct copy_task *cp_task = (struct copy_task *)ref;
enum spdk_bdev_io_status bdev_status;
if (status != 0) {
bdev_status = SPDK_BDEV_IO_STATUS_FAILED;
} else {
bdev_status = SPDK_BDEV_IO_STATUS_SUCCESS;
}
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(cp_task), bdev_status);
}
static struct malloc_disk *g_malloc_disk_head = NULL;
int malloc_disk_count = 0;
static int blockdev_malloc_initialize(void);
static void blockdev_malloc_finish(void);
static void blockdev_malloc_get_spdk_running_config(FILE *fp);
static int
blockdev_malloc_get_ctx_size(void)
{
return spdk_copy_module_get_max_ctx_size();
}
SPDK_BDEV_MODULE_REGISTER(blockdev_malloc_initialize, blockdev_malloc_finish,
blockdev_malloc_get_spdk_running_config, blockdev_malloc_get_ctx_size)
static void
blockdev_malloc_delete_from_list(struct malloc_disk *malloc_disk)
{
struct malloc_disk *prev = NULL;
struct malloc_disk *node = g_malloc_disk_head;
if (malloc_disk == NULL)
return;
while (node != NULL) {
if (node == malloc_disk) {
if (prev != NULL) {
prev->next = malloc_disk->next;
} else {
g_malloc_disk_head = malloc_disk->next;
}
break;
}
prev = node;
node = node->next;
}
}
static int
blockdev_malloc_destruct(struct spdk_bdev *bdev)
{
struct malloc_disk *malloc_disk = (struct malloc_disk *)bdev;
blockdev_malloc_delete_from_list(malloc_disk);
rte_free(malloc_disk->malloc_buf);
rte_free(malloc_disk);
return 0;
}
static int64_t
blockdev_malloc_read(struct malloc_disk *mdisk, struct copy_task *copy_req,
void *buf, uint64_t nbytes, off_t offset)
{
SPDK_TRACELOG(SPDK_TRACE_MALLOC, "read %lu bytes from offset %#lx to %p\n",
nbytes, offset, buf);
return spdk_copy_submit(copy_req, buf, mdisk->malloc_buf + offset,
nbytes, malloc_done);
}
static int64_t
blockdev_malloc_writev(struct malloc_disk *mdisk, struct copy_task *copy_req,
struct iovec *iov, int iovcnt, size_t len, off_t offset)
{
if ((iovcnt != 1) || (iov->iov_len != len))
return -1;
SPDK_TRACELOG(SPDK_TRACE_MALLOC, "wrote %lu bytes to offset %#lx from %p\n",
iov->iov_len, offset, iov->iov_base);
return spdk_copy_submit(copy_req, mdisk->malloc_buf + offset,
iov->iov_base, len, malloc_done);
}
static int
blockdev_malloc_check_io(struct spdk_bdev *bdev)
{
return spdk_copy_check_io();
}
static int64_t
blockdev_malloc_flush(struct malloc_disk *mdisk, struct copy_task *copy_req,
uint64_t offset, uint64_t nbytes)
{
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(copy_req), SPDK_BDEV_IO_STATUS_SUCCESS);
return 0;
}
static int
blockdev_malloc_reset(struct malloc_disk *mdisk, struct copy_task *copy_req)
{
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(copy_req), SPDK_BDEV_IO_STATUS_SUCCESS);
return 0;
}
static int _blockdev_malloc_submit_request(struct spdk_bdev_io *bdev_io)
{
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_READ:
if (bdev_io->u.read.buf == NULL) {
bdev_io->u.read.buf = ((struct malloc_disk *)bdev_io->ctx)->malloc_buf +
bdev_io->u.read.offset;
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bdev_io->driver_ctx),
SPDK_BDEV_IO_STATUS_SUCCESS);
return 0;
}
return blockdev_malloc_read((struct malloc_disk *)bdev_io->ctx,
(struct copy_task *)bdev_io->driver_ctx,
bdev_io->u.read.buf,
bdev_io->u.read.nbytes,
bdev_io->u.read.offset);
case SPDK_BDEV_IO_TYPE_WRITE:
return blockdev_malloc_writev((struct malloc_disk *)bdev_io->ctx,
(struct copy_task *)bdev_io->driver_ctx,
bdev_io->u.write.iovs,
bdev_io->u.write.iovcnt,
bdev_io->u.write.len,
bdev_io->u.write.offset);
case SPDK_BDEV_IO_TYPE_RESET:
return blockdev_malloc_reset((struct malloc_disk *)bdev_io->ctx,
(struct copy_task *)bdev_io->driver_ctx);
case SPDK_BDEV_IO_TYPE_FLUSH:
return blockdev_malloc_flush((struct malloc_disk *)bdev_io->ctx,
(struct copy_task *)bdev_io->driver_ctx,
bdev_io->u.flush.offset,
bdev_io->u.flush.length);
default:
return -1;
}
return 0;
}
static void blockdev_malloc_submit_request(struct spdk_bdev_io *bdev_io)
{
if (_blockdev_malloc_submit_request(bdev_io) < 0) {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static void blockdev_malloc_free_request(struct spdk_bdev_io *bdev_io)
{
}
static struct spdk_bdev_fn_table malloc_fn_table = {
.destruct = blockdev_malloc_destruct,
.check_io = blockdev_malloc_check_io,
.submit_request = blockdev_malloc_submit_request,
.free_request = blockdev_malloc_free_request,
};
struct malloc_disk *create_malloc_disk(uint64_t num_blocks, uint32_t block_size)
{
struct malloc_disk *mdisk;
if (block_size % 512 != 0) {
SPDK_ERRLOG("Block size %u is not a multiple of 512.\n", block_size);
return NULL;
}
if (num_blocks == 0) {
SPDK_ERRLOG("Disk must be more than 0 blocks\n");
return NULL;
}
mdisk = rte_zmalloc(NULL, sizeof(*mdisk), 0);
if (!mdisk) {
perror("mdisk");
return NULL;
}
/*
* Allocate the large backend memory buffer using rte_malloc(),
* so that we guarantee it is allocated from hugepage memory.
*
* TODO: need to pass a hint so we know which socket to allocate
* from on multi-socket systems.
*/
mdisk->malloc_buf = rte_zmalloc(NULL, num_blocks * block_size, 2 * 1024 * 1024);
if (!mdisk->malloc_buf) {
SPDK_ERRLOG("rte_zmalloc failed\n");
rte_free(mdisk);
return NULL;
}
snprintf(mdisk->disk.name, SPDK_BDEV_MAX_NAME_LENGTH, "Malloc%d", malloc_disk_count);
snprintf(mdisk->disk.product_name, SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH, "Malloc disk");
malloc_disk_count++;
mdisk->disk.write_cache = 1;
mdisk->disk.blocklen = block_size;
mdisk->disk.blockcnt = num_blocks;
mdisk->disk.ctxt = mdisk;
mdisk->disk.fn_table = &malloc_fn_table;
spdk_bdev_register(&mdisk->disk);
mdisk->next = g_malloc_disk_head;
g_malloc_disk_head = mdisk;
return mdisk;
}
static void free_malloc_disk(struct malloc_disk *mdisk)
{
rte_free(mdisk->malloc_buf);
rte_free(mdisk);
}
static int blockdev_malloc_initialize()
{
struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc");
int NumberOfLuns, LunSizeInMB, BlockSize, i;
uint64_t size;
struct malloc_disk *mdisk;
if (sp != NULL) {
NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns");
LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB");
BlockSize = spdk_conf_section_get_intval(sp, "BlockSize");
if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) {
SPDK_ERRLOG("Malloc section present, but no devices specified\n");
return EINVAL;
}
if (BlockSize < 1) {
/* Default is 512 bytes */
BlockSize = 512;
}
size = (uint64_t)LunSizeInMB * 1024 * 1024;
for (i = 0; i < NumberOfLuns; i++) {
mdisk = create_malloc_disk(size / BlockSize, BlockSize);
if (mdisk == NULL) {
SPDK_ERRLOG("Could not create malloc disk\n");
return EINVAL;
}
}
}
return 0;
}
static void blockdev_malloc_finish()
{
struct malloc_disk *mdisk;
while (g_malloc_disk_head != NULL) {
mdisk = g_malloc_disk_head;
g_malloc_disk_head = mdisk->next;
free_malloc_disk(mdisk);
}
}
static void
blockdev_malloc_get_spdk_running_config(FILE *fp)
{
int num_malloc_luns = 0;
uint64_t malloc_lun_size = 0;
/* count number of malloc LUNs, get LUN size */
struct malloc_disk *mdisk = g_malloc_disk_head;
while (mdisk != NULL) {
if (0 == malloc_lun_size) {
/* assume all malloc luns the same size */
malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt;
malloc_lun_size /= (1024 * 1024);
}
num_malloc_luns++;
mdisk = mdisk->next;
}
if (num_malloc_luns > 0) {
fprintf(fp,
"\n"
"# Users may change this section to create a different number or size of\n"
"# malloc LUNs.\n"
"# This will generate %d LUNs with a malloc-allocated backend. Each LUN \n"
"# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n"
"# Not all LUNs defined here are necessarily used below.\n"
"[Malloc]\n"
" NumberOfLuns %d\n"
" LunSizeInMB %" PRIu64 "\n",
num_malloc_luns, malloc_lun_size,
num_malloc_luns - 1, num_malloc_luns,
malloc_lun_size);
}
}
SPDK_LOG_REGISTER_TRACE_FLAG("malloc", SPDK_TRACE_MALLOC)

View File

@ -0,0 +1,43 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SPDK_BLOCKDEV_MALLOC_H
#define SPDK_BLOCKDEV_MALLOC_H
#include <stdint.h>
struct malloc_disk;
struct malloc_disk *create_malloc_disk(uint64_t num_blocks, uint32_t block_size);
#endif /* SPDK_BLOCKDEV_MALLOC_H */

View File

@ -0,0 +1,79 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blockdev_malloc.h"
#include "spdk/log.h"
#include "spdk/rpc.h"
struct rpc_construct_malloc {
uint32_t num_blocks;
uint32_t block_size;
};
static const struct spdk_json_object_decoder rpc_construct_malloc_decoders[] = {
{"num_blocks", offsetof(struct rpc_construct_malloc, num_blocks), spdk_json_decode_uint32},
{"block_size", offsetof(struct rpc_construct_malloc, block_size), spdk_json_decode_uint32},
};
static void
spdk_rpc_construct_malloc_lun(struct spdk_jsonrpc_server_conn *conn,
const struct spdk_json_val *params,
const struct spdk_json_val *id)
{
struct rpc_construct_malloc req = {};
struct spdk_json_write_ctx *w;
if (spdk_json_decode_object(params, rpc_construct_malloc_decoders,
sizeof(rpc_construct_malloc_decoders) / sizeof(*rpc_construct_malloc_decoders),
&req)) {
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "spdk_json_decode_object failed\n");
goto invalid;
}
if (create_malloc_disk(req.num_blocks, req.block_size) == NULL) {
goto invalid;
}
if (id == NULL) {
return;
}
w = spdk_jsonrpc_begin_result(conn, id);
spdk_json_write_bool(w, true);
spdk_jsonrpc_end_result(conn, w);
return;
invalid:
spdk_jsonrpc_send_error_response(conn, id, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
}
SPDK_RPC_REGISTER("construct_malloc_lun", spdk_rpc_construct_malloc_lun)

41
lib/bdev/nvme/Makefile Normal file
View File

@ -0,0 +1,41 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
CFLAGS += $(DPDK_INC)
C_SRCS = blockdev_nvme.c
LIBNAME = bdev_nvme
include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk

View File

@ -0,0 +1,666 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <errno.h>
#include <sys/param.h>
#include <pthread.h>
#include <rte_config.h>
#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_lcore.h>
#include <rte_malloc.h>
#include "spdk/conf.h"
#include "spdk/pci.h"
#include "spdk/log.h"
#include "spdk/bdev.h"
#include "spdk/nvme.h"
#define MAX_NVME_NAME_LENGTH 64
void init_request_mempool(void);
static void blockdev_nvme_get_spdk_running_config(FILE *fp);
struct nvme_device {
/**
* points to pinned, physically contiguous memory region;
* contains 4KB IDENTIFY structure for controller which is
* target for CONTROLLER IDENTIFY command during initialization
*/
struct spdk_nvme_ctrlr *ctrlr;
/** linked list pointer for device list */
TAILQ_ENTRY(nvme_device) tailq;
int id;
};
struct nvme_blockdev {
struct spdk_bdev disk;
struct spdk_nvme_ctrlr *ctrlr;
struct spdk_nvme_ns *ns;
struct spdk_nvme_qpair *qpair;
uint64_t lba_start;
uint64_t lba_end;
uint64_t blocklen;
};
#define NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT 1
struct nvme_blockio {
struct spdk_nvme_dsm_range dsm_range[NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT];
};
enum data_direction {
BDEV_DISK_READ = 0,
BDEV_DISK_WRITE = 1
};
struct nvme_bdf_whitelist {
uint16_t domain;
uint8_t bus;
uint8_t dev;
uint8_t func;
char name[MAX_NVME_NAME_LENGTH];
};
#define NVME_MAX_BLOCKDEVS_PER_CONTROLLER 256
#define NVME_MAX_CONTROLLERS 16
#define NVME_MAX_BLOCKDEVS (NVME_MAX_BLOCKDEVS_PER_CONTROLLER * NVME_MAX_CONTROLLERS)
static struct nvme_blockdev g_blockdev[NVME_MAX_BLOCKDEVS];
static int blockdev_index_max = 0;
static int nvme_luns_per_ns = 1;
static int nvme_controller_index = 0;
static int LunSizeInMB = 0;
static int num_controllers = -1;
static int unbindfromkernel = 0;
static TAILQ_HEAD(, nvme_device) g_nvme_devices = TAILQ_HEAD_INITIALIZER(g_nvme_devices);;
static void nvme_ctrlr_initialize_blockdevs(struct spdk_nvme_ctrlr *ctrlr,
int bdev_per_ns, int ctrlr_id);
static int nvme_library_init(void);
static void nvme_library_fini(void);
int nvme_queue_cmd(struct nvme_blockdev *bdev, struct nvme_blockio *bio,
int direction, void *buf, uint64_t nbytes, uint64_t offset);
static int
nvme_get_ctx_size(void)
{
return sizeof(struct nvme_blockio);
}
SPDK_BDEV_MODULE_REGISTER(nvme_library_init, NULL, blockdev_nvme_get_spdk_running_config,
nvme_get_ctx_size)
static int64_t
blockdev_nvme_read(struct nvme_blockdev *nbdev, struct nvme_blockio *bio,
void *buf, uint64_t nbytes, off_t offset)
{
int64_t rc;
SPDK_TRACELOG(SPDK_TRACE_NVME, "read %lu bytes with offset %#lx to %p\n",
nbytes, offset, buf);
rc = nvme_queue_cmd(nbdev, bio, BDEV_DISK_READ, buf, nbytes, offset);
if (rc < 0)
return -1;
return nbytes;
}
static int64_t
blockdev_nvme_writev(struct nvme_blockdev *nbdev, struct nvme_blockio *bio,
struct iovec *iov, int iovcnt, size_t len, off_t offset)
{
int64_t rc;
if ((iovcnt != 1) || (iov->iov_len != len))
return -1;
SPDK_TRACELOG(SPDK_TRACE_NVME, "write %lu bytes with offset %#lx from %p\n",
iov->iov_len, offset, iov->iov_base);
rc = nvme_queue_cmd(nbdev, bio, BDEV_DISK_WRITE, (void *)iov->iov_base,
iov->iov_len, offset);
if (rc < 0)
return -1;
return iov->iov_len;
}
static int
blockdev_nvme_check_io(struct spdk_bdev *bdev)
{
struct nvme_blockdev *nbdev = (struct nvme_blockdev *)bdev;
spdk_nvme_qpair_process_completions(nbdev->qpair, 0);
return 0;
}
static int
blockdev_nvme_destruct(struct spdk_bdev *bdev)
{
return 0;
}
static int
blockdev_nvme_flush(struct nvme_blockdev *nbdev, struct nvme_blockio *bio,
uint64_t offset, uint64_t nbytes)
{
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS);
return 0;
}
static int
blockdev_nvme_reset(struct nvme_blockdev *nbdev, struct nvme_blockio *bio)
{
int rc;
enum spdk_bdev_io_status status;
status = SPDK_BDEV_IO_STATUS_SUCCESS;
rc = spdk_nvme_ctrlr_reset(nbdev->ctrlr);
if (rc != 0) {
status = SPDK_BDEV_IO_STATUS_FAILED;
}
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), status);
return rc;
}
static int
blockdev_nvme_unmap(struct nvme_blockdev *nbdev, struct nvme_blockio *bio,
struct spdk_scsi_unmap_bdesc *umap_d,
uint16_t bdesc_count);
static void blockdev_nvme_get_rbuf_cb(struct spdk_bdev_io *bdev_io)
{
int ret;
ret = blockdev_nvme_read((struct nvme_blockdev *)bdev_io->ctx,
(struct nvme_blockio *)bdev_io->driver_ctx,
bdev_io->u.read.buf,
bdev_io->u.read.nbytes,
bdev_io->u.read.offset);
if (ret < 0) {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static int _blockdev_nvme_submit_request(struct spdk_bdev_io *bdev_io)
{
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_READ:
spdk_bdev_io_get_rbuf(bdev_io, blockdev_nvme_get_rbuf_cb);
return 0;
case SPDK_BDEV_IO_TYPE_WRITE:
return blockdev_nvme_writev((struct nvme_blockdev *)bdev_io->ctx,
(struct nvme_blockio *)bdev_io->driver_ctx,
bdev_io->u.write.iovs,
bdev_io->u.write.iovcnt,
bdev_io->u.write.len,
bdev_io->u.write.offset);
case SPDK_BDEV_IO_TYPE_UNMAP:
return blockdev_nvme_unmap((struct nvme_blockdev *)bdev_io->ctx,
(struct nvme_blockio *)bdev_io->driver_ctx,
bdev_io->u.unmap.unmap_bdesc,
bdev_io->u.unmap.bdesc_count);
case SPDK_BDEV_IO_TYPE_RESET:
return blockdev_nvme_reset((struct nvme_blockdev *)bdev_io->ctx,
(struct nvme_blockio *)bdev_io->driver_ctx);
case SPDK_BDEV_IO_TYPE_FLUSH:
return blockdev_nvme_flush((struct nvme_blockdev *)bdev_io->ctx,
(struct nvme_blockio *)bdev_io->driver_ctx,
bdev_io->u.flush.offset,
bdev_io->u.flush.length);
default:
return -1;
}
return 0;
}
static void blockdev_nvme_submit_request(struct spdk_bdev_io *bdev_io)
{
if (_blockdev_nvme_submit_request(bdev_io) < 0) {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static void blockdev_nvme_free_request(struct spdk_bdev_io *bdev_io)
{
}
static struct spdk_bdev_fn_table nvmelib_fn_table = {
.destruct = blockdev_nvme_destruct,
.check_io = blockdev_nvme_check_io,
.submit_request = blockdev_nvme_submit_request,
.free_request = blockdev_nvme_free_request,
};
struct nvme_probe_ctx {
int controllers_remaining;
int num_whitelist_controllers;
struct nvme_bdf_whitelist whitelist[NVME_MAX_CONTROLLERS];
};
static bool
probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_nvme_ctrlr_opts *opts)
{
struct nvme_probe_ctx *ctx = cb_ctx;
uint16_t found_domain = spdk_pci_device_get_domain(pci_dev);
uint8_t found_bus = spdk_pci_device_get_bus(pci_dev);
uint8_t found_dev = spdk_pci_device_get_dev(pci_dev);
uint8_t found_func = spdk_pci_device_get_func(pci_dev);
int i;
bool claim_device = false;
SPDK_NOTICELOG("Probing device %x:%x:%x.%x\n",
found_domain, found_bus, found_dev, found_func);
if (ctx->controllers_remaining == 0) {
return false;
}
if (ctx->num_whitelist_controllers == 0) {
claim_device = true;
} else {
for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
if (found_domain == ctx->whitelist[i].domain &&
found_bus == ctx->whitelist[i].bus &&
found_dev == ctx->whitelist[i].dev &&
found_func == ctx->whitelist[i].func) {
claim_device = true;
break;
}
}
}
if (!claim_device) {
return false;
}
if (spdk_pci_device_has_non_uio_driver(pci_dev)) {
/* NVMe kernel driver case */
if (unbindfromkernel || ctx->num_whitelist_controllers > 0) {
if (spdk_pci_device_switch_to_uio_driver(pci_dev)) {
return false;
}
} else {
SPDK_WARNLOG("Device has kernel nvme driver attached, skipping...\n");
return false;
}
} else {
if (spdk_pci_device_bind_uio_driver(pci_dev)) {
SPDK_WARNLOG("Device %s %d:%d:%d bind to uio driver failed\n",
spdk_pci_device_get_device_name(pci_dev),
spdk_pci_device_get_bus(pci_dev),
spdk_pci_device_get_dev(pci_dev),
spdk_pci_device_get_func(pci_dev));
return false;
}
}
/* Claim the device in case conflict with other process */
if (spdk_pci_device_claim(pci_dev) != 0) {
return false;
}
return true;
}
static void
attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_nvme_ctrlr *ctrlr,
const struct spdk_nvme_ctrlr_opts *opts)
{
struct nvme_probe_ctx *ctx = cb_ctx;
struct nvme_device *dev;
dev = malloc(sizeof(struct nvme_device));
if (dev == NULL) {
SPDK_ERRLOG("Failed to allocate device struct\n");
return;
}
dev->ctrlr = ctrlr;
dev->id = nvme_controller_index++;
nvme_ctrlr_initialize_blockdevs(dev->ctrlr, nvme_luns_per_ns, dev->id);
TAILQ_INSERT_TAIL(&g_nvme_devices, dev, tailq);
if (ctx->controllers_remaining > 0) {
ctx->controllers_remaining--;
}
}
static int
nvme_library_init(void)
{
struct spdk_conf_section *sp;
const char *val;
int i, rc;
struct nvme_probe_ctx probe_ctx;
sp = spdk_conf_find_section(NULL, "Nvme");
if (sp == NULL) {
/*
* If configuration file did not specify the Nvme section, do
* not take the time to initialize the NVMe devices.
*/
return 0;
}
init_request_mempool();
nvme_luns_per_ns = spdk_conf_section_get_intval(sp, "NvmeLunsPerNs");
if (nvme_luns_per_ns < 1)
nvme_luns_per_ns = 1;
if (nvme_luns_per_ns > NVME_MAX_BLOCKDEVS_PER_CONTROLLER) {
SPDK_ERRLOG("The input value nvme_luns_per_ns(%d) exceeds the maximal "
"value(%d)\n", nvme_luns_per_ns, NVME_MAX_BLOCKDEVS_PER_CONTROLLER);
return -1;
}
LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB");
if (LunSizeInMB < 0)
LunSizeInMB = 0;
spdk_nvme_retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount");
if (spdk_nvme_retry_count < 0)
spdk_nvme_retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT;
/*
* If NumControllers is not found, this will return -1, which we
* will later use to denote that we should initialize all
* controllers.
*/
num_controllers = spdk_conf_section_get_intval(sp, "NumControllers");
val = spdk_conf_section_get_val(sp, "UnbindFromKernel");
if (val != NULL) {
if (!strcmp(val, "Yes")) {
unbindfromkernel = 1;
}
}
/* Init the whitelist */
probe_ctx.num_whitelist_controllers = 0;
if (num_controllers > 0) {
for (i = 0; ; i++) {
unsigned int domain, bus, dev, func;
val = spdk_conf_section_get_nmval(sp, "BDF", i, 0);
if (val == NULL) {
break;
}
rc = sscanf(val, "%x:%x:%x.%x", &domain, &bus, &dev, &func);
if (rc != 4) {
SPDK_ERRLOG("Invalid format for BDF: %s\n", val);
return -1;
}
probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].domain = domain;
probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].bus = bus;
probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].dev = dev;
probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].func = func;
val = spdk_conf_section_get_nmval(sp, "BDF", i, 1);
if (val == NULL) {
SPDK_ERRLOG("BDF section with no device name\n");
return -1;
}
snprintf(probe_ctx.whitelist[probe_ctx.num_whitelist_controllers].name, MAX_NVME_NAME_LENGTH, "%s",
val);
probe_ctx.num_whitelist_controllers++;
}
}
probe_ctx.controllers_remaining = num_controllers;
if (spdk_nvme_probe(&probe_ctx, probe_cb, attach_cb, NULL)) {
return -1;
}
return 0;
}
__attribute__((destructor)) void
nvme_library_fini(void)
{
struct nvme_device *dev;
while (!TAILQ_EMPTY(&g_nvme_devices)) {
dev = TAILQ_FIRST(&g_nvme_devices);
TAILQ_REMOVE(&g_nvme_devices, dev, tailq);
spdk_nvme_detach(dev->ctrlr);
free(dev);
}
}
void
nvme_ctrlr_initialize_blockdevs(struct spdk_nvme_ctrlr *ctrlr, int bdev_per_ns, int ctrlr_id)
{
struct nvme_blockdev *bdev;
struct spdk_nvme_ns *ns;
const struct spdk_nvme_ctrlr_data *cdata;
uint64_t bdev_size, lba_offset, sectors_per_stripe;
int ns_id, num_ns, bdev_idx;
uint64_t LunSizeInsector;
num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
cdata = spdk_nvme_ctrlr_get_data(ctrlr);
for (ns_id = 1; ns_id <= num_ns; ns_id++) {
ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id);
bdev_size = spdk_nvme_ns_get_num_sectors(ns) / bdev_per_ns;
/*
* Align each blockdev on a 1MB boundary - this helps cover Fultondale case
* where I/O that span a 128KB boundary must be split for optimal performance.
* Using a 1MB hardcoded boundary here so that we do not have to export
* stripe size information from the NVMe driver for now.
*/
sectors_per_stripe = (1 << 20) / spdk_nvme_ns_get_sector_size(ns);
LunSizeInsector = ((uint64_t)LunSizeInMB << 20) / spdk_nvme_ns_get_sector_size(ns);
if ((LunSizeInMB > 0) && (LunSizeInsector < bdev_size))
bdev_size = LunSizeInsector;
bdev_size &= ~(sectors_per_stripe - 1);
lba_offset = 0;
for (bdev_idx = 0; bdev_idx < bdev_per_ns; bdev_idx++) {
if (blockdev_index_max >= NVME_MAX_BLOCKDEVS)
return;
bdev = &g_blockdev[blockdev_index_max];
bdev->ctrlr = ctrlr;
bdev->ns = ns;
bdev->lba_start = lba_offset;
bdev->lba_end = lba_offset + bdev_size - 1;
lba_offset += bdev_size;
snprintf(bdev->disk.name, SPDK_BDEV_MAX_NAME_LENGTH,
"Nvme%dn%dp%d", ctrlr_id, spdk_nvme_ns_get_id(ns), bdev_idx);
snprintf(bdev->disk.product_name, SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH,
"iSCSI NVMe disk");
bdev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, 0);
if (!bdev->qpair) {
SPDK_ERRLOG("Could not allocate I/O queue pair for %s\n",
bdev->disk.name);
continue;
}
if (cdata->oncs.dsm) {
/*
* Enable the thin provisioning
* if nvme controller supports
* DataSet Management command.
*/
bdev->disk.thin_provisioning = 1;
bdev->disk.max_unmap_bdesc_count =
NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT;
}
bdev->disk.write_cache = 1;
bdev->blocklen = spdk_nvme_ns_get_sector_size(ns);
bdev->disk.blocklen = bdev->blocklen;
bdev->disk.blockcnt = bdev->lba_end - bdev->lba_start + 1;
bdev->disk.ctxt = bdev;
bdev->disk.fn_table = &nvmelib_fn_table;
spdk_bdev_register(&bdev->disk);
blockdev_index_max++;
}
}
}
static void
queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
{
struct nvme_blockio *bio = ref;
enum spdk_bdev_io_status status;
if (spdk_nvme_cpl_is_error(cpl)) {
status = SPDK_BDEV_IO_STATUS_FAILED;
} else {
status = SPDK_BDEV_IO_STATUS_SUCCESS;
}
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), status);
}
int
nvme_queue_cmd(struct nvme_blockdev *bdev, struct nvme_blockio *bio,
int direction, void *buf, uint64_t nbytes, uint64_t offset)
{
uint32_t ss = spdk_nvme_ns_get_sector_size(bdev->ns);
uint32_t lba_count;
uint64_t relative_lba = offset / bdev->blocklen;
uint64_t next_lba = relative_lba + bdev->lba_start;
int rc;
if (nbytes % ss) {
SPDK_ERRLOG("Unaligned IO request length\n");
return -1;
}
lba_count = nbytes / ss;
if (direction == BDEV_DISK_READ) {
rc = spdk_nvme_ns_cmd_read(bdev->ns, bdev->qpair, buf, next_lba,
lba_count, queued_done, bio, 0);
} else {
rc = spdk_nvme_ns_cmd_write(bdev->ns, bdev->qpair, buf, next_lba,
lba_count, queued_done, bio, 0);
}
if (rc != 0) {
SPDK_ERRLOG("IO failed\n");
}
return rc;
}
static int
blockdev_nvme_unmap(struct nvme_blockdev *nbdev, struct nvme_blockio *bio,
struct spdk_scsi_unmap_bdesc *unmap_d,
uint16_t bdesc_count)
{
int rc = 0, i;
for (i = 0; i < bdesc_count; i++) {
bio->dsm_range[i].starting_lba =
nbdev->lba_start + be64toh(unmap_d->lba);
bio->dsm_range[i].length = be32toh(unmap_d->block_count);
unmap_d++;
}
rc = spdk_nvme_ns_cmd_deallocate(nbdev->ns, nbdev->qpair, bio->dsm_range, bdesc_count,
queued_done, bio);
if (rc != 0)
return -1;
return 0;
}
struct rte_mempool *request_mempool;
void init_request_mempool()
{
request_mempool = rte_mempool_create("nvme request", 8192,
spdk_nvme_request_size(),
128, 0, NULL, NULL, NULL, NULL,
SOCKET_ID_ANY, 0);
}
static void
blockdev_nvme_get_spdk_running_config(FILE *fp)
{
fprintf(fp,
"\n"
"# Users may change this to partition an NVMe namespace into multiple LUNs.\n"
"[Nvme]\n"
" UnbindFromKernel %s\n"
" NvmeLunsPerNs %d\n",
unbindfromkernel ? "Yes" : "No",
nvme_luns_per_ns);
if (num_controllers != -1) {
fprintf(fp, " NumControllers %d\n", num_controllers);
}
if (LunSizeInMB != 0) {
fprintf(fp, " LunSizeInMB %d\n", LunSizeInMB);
}
}
SPDK_LOG_REGISTER_TRACE_FLAG("nvme", SPDK_TRACE_NVME)

17
mk/spdk.modules.mk Normal file
View File

@ -0,0 +1,17 @@
BLOCKDEV_MODULES += $(SPDK_ROOT_DIR)/lib/bdev/malloc/libspdk_bdev_malloc.a
BLOCKDEV_MODULES += $(SPDK_ROOT_DIR)/lib/bdev/nvme/libspdk_bdev_nvme.a \
$(SPDK_ROOT_DIR)/lib/nvme/libspdk_nvme.a
COPY_MODULES += $(SPDK_ROOT_DIR)/lib/copy/ioat/libspdk_copy_ioat.a \
$(SPDK_ROOT_DIR)/lib/ioat/libspdk_ioat.a
BLOCKDEV_MODULES_LINKER_ARGS = -Wl,--whole-archive \
$(BLOCKDEV_MODULES) \
-Wl,--no-whole-archive \
$(BLOCKDEV_MODULES_DEPS)
COPY_MODULES_LINKER_ARGS = -Wl,--whole-archive \
$(COPY_MODULES) \
-Wl,--no-whole-archive \
$(COPY_MODULES_DEPS)

View File

@ -34,7 +34,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
DIRS-y = event log json jsonrpc nvme memory ioat
DIRS-y = bdev event log json jsonrpc nvme memory ioat
DIRS-$(CONFIG_RDMA) += nvmf
.PHONY: all clean $(DIRS-y)

44
test/lib/bdev/Makefile Normal file
View File

@ -0,0 +1,44 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
DIRS-y = bdevio bdevperf
.PHONY: all clean $(DIRS-y)
all: $(DIRS-y)
clean: $(DIRS-y)
include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk

10
test/lib/bdev/bdev.conf Normal file
View File

@ -0,0 +1,10 @@
[Nvme]
NvmeLunsPerNs 1
UnbindFromKernel Yes
# autotest.sh will automatically rmmod ioatdma, so we do
# not need to specify UnbindFromKernel and Whitelist
# entries to enable ioat offload for this malloc LUN
[Malloc]
NumberOfLuns 5
LunSizeInMB 32

1
test/lib/bdev/bdevio/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
bdevio

View File

@ -0,0 +1,69 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
APP = bdevio
C_SRCS := bdevio.c
CFLAGS += -I. $(DPDK_INC)
SPDK_LIBS += $(SPDK_ROOT_DIR)/lib/bdev/libspdk_bdev.a \
$(SPDK_ROOT_DIR)/lib/copy/libspdk_copy.a \
$(SPDK_ROOT_DIR)/lib/event/libspdk_event.a \
$(SPDK_ROOT_DIR)/lib/trace/libspdk_trace.a \
$(SPDK_ROOT_DIR)/lib/log/libspdk_log.a \
$(SPDK_ROOT_DIR)/lib/conf/libspdk_conf.a \
$(SPDK_ROOT_DIR)/lib/util/libspdk_util.a \
$(SPDK_ROOT_DIR)/lib/memory/libspdk_memory.a \
$(SPDK_ROOT_DIR)/lib/rpc/libspdk_rpc.a \
$(SPDK_ROOT_DIR)/lib/jsonrpc/libspdk_jsonrpc.a \
$(SPDK_ROOT_DIR)/lib/json/libspdk_json.a
LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \
$(COPY_MODULES_LINKER_ARGS)
LIBS += $(SPDK_LIBS) $(PCIACCESS_LIB) $(DPDK_LIB) -lcunit
all : $(APP)
$(APP) : $(OBJS) $(SPDK_LIBS) $(BLOCKDEV_MODULES) $(LINKER_MODULES)
$(LINK_C)
clean :
$(CLEAN_C) $(APP)
include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk

View File

@ -0,0 +1,547 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <rte_config.h>
#include <rte_eal.h>
#include <rte_debug.h>
#include <rte_malloc.h>
#include <rte_ring.h>
#include "spdk/bdev.h"
#include "spdk/bdev_db.h"
#include "spdk/copy_engine.h"
#include "spdk/log.h"
#include "CUnit/Basic.h"
#define BUFFER_SIZE 260 * 1024
#define BDEV_TASK_ARRAY_SIZE 2048
#include "../common.c"
struct io_target {
struct spdk_bdev *bdev;
struct io_target *next;
};
struct io_target *g_io_targets = NULL;
static int
bdevio_construct_targets(void)
{
struct blockdev_entry *bdev_entry = g_bdevs;
struct spdk_bdev *bdev;
struct io_target *target;
while (bdev_entry != NULL) {
bdev = bdev_entry->bdev;
if (bdev->claimed) {
bdev_entry = bdev_entry->next;
continue;
}
target = malloc(sizeof(struct io_target));
if (target == NULL) {
return -ENOMEM;
}
target->bdev = bdev;
target->next = g_io_targets;
g_io_targets = target;
bdev_entry = bdev_entry->next;
}
return 0;
}
static int complete;
static enum spdk_bdev_io_status completion_status_per_io;
static void
initialize_buffer(char **buf, int pattern, int size)
{
*buf = rte_malloc(NULL, size, 0x1000);
memset(*buf, pattern, size);
}
static void
quick_test_complete(spdk_event_t event)
{
struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event);
completion_status_per_io = bdev_io->status;
complete = 1;
spdk_bdev_free_io(bdev_io);
}
static int
check_io_completion(void)
{
int rc;
struct blockdev_entry *bdev_entry;
rc = 0;
while (!complete) {
bdev_entry = g_bdevs;
while (bdev_entry != NULL) {
spdk_bdev_do_work(bdev_entry->bdev);
bdev_entry = bdev_entry->next;
}
spdk_event_queue_run_all(rte_lcore_id());
}
return rc;
}
struct iovec iov;
static int
blockdev_write(struct io_target *target, void *bdev_task_ctx, char **tx_buf,
int data_len, uint64_t offset)
{
struct spdk_bdev_io *bdev_io;
complete = 0;
completion_status_per_io = SPDK_BDEV_IO_STATUS_FAILED;
iov.iov_base = *tx_buf;
iov.iov_len = data_len;
bdev_io = spdk_bdev_writev(target->bdev, &iov, 1, iov.iov_len,
(uint64_t)offset, quick_test_complete,
bdev_task_ctx);
if (!bdev_io) {
return -1;
}
return data_len;
}
static int
blockdev_read(struct io_target *target, void *bdev_task_ctx, char **rx_buf,
int data_len, uint64_t offset)
{
struct spdk_bdev_io *bdev_io;
complete = 0;
completion_status_per_io = SPDK_BDEV_IO_STATUS_FAILED;
bdev_io = spdk_bdev_read(target->bdev, *rx_buf, data_len, offset,
quick_test_complete, bdev_task_ctx);
if (!bdev_io) {
return -1;
}
return data_len;
}
static int
blockdev_write_read_data_match(char **rx_buf, char **tx_buf, int data_length)
{
int rc;
rc = memcmp(*rx_buf, *tx_buf, data_length);
rte_free(*rx_buf);
rte_free(*tx_buf);
return rc;
}
static void
blockdev_write_read(uint32_t data_length, int pattern, uint64_t offset,
int expected_rc)
{
struct io_target *target;
char bdev_task_ctx[BDEV_TASK_ARRAY_SIZE];
char *tx_buf = NULL;
char *rx_buf = NULL;
int rc;
target = g_io_targets;
while (target != NULL) {
if (data_length < target->bdev->blocklen) {
target = target->next;
continue;
}
initialize_buffer(&tx_buf, pattern, data_length);
initialize_buffer(&rx_buf, 0, data_length);
rc = blockdev_write(target, (void *)bdev_task_ctx, &tx_buf,
data_length, offset);
/* Assert the rc of the respective blockdev */
CU_ASSERT_EQUAL(rc, expected_rc);
/* If the write was successful, the function returns the data_length
* and the completion_status_per_io is 0 */
if (rc < (int)data_length) {
CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_FAILED);
} else {
check_io_completion();
CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_SUCCESS);
}
rc = blockdev_read(target, (void *)bdev_task_ctx, &rx_buf,
data_length, offset);
/* Assert the rc of the respective blockdev */
CU_ASSERT_EQUAL(rc, expected_rc);
/* If the read was successful, the function returns the data_length
* and the completion_status_per_io is 0 */
if (rc < (int)data_length) {
CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_FAILED);
} else {
check_io_completion();
CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_SUCCESS);
}
if (completion_status_per_io == SPDK_BDEV_IO_STATUS_SUCCESS) {
rc = blockdev_write_read_data_match(&rx_buf, &tx_buf, data_length);
/* Assert the write by comparing it with values read
* from each blockdev */
CU_ASSERT_EQUAL(rc, 0);
}
target = target->next;
}
}
static void
blockdev_write_read_4k(void)
{
uint32_t data_length;
uint64_t offset;
int pattern;
int expected_rc;
/* Data size = 4K */
data_length = 4096;
CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
offset = 0;
pattern = 0xA3;
/* Params are valid, hence the expected return value
* of write and read for all blockdevs is the data_length */
expected_rc = data_length;
blockdev_write_read(data_length, pattern, offset, expected_rc);
}
static void
blockdev_write_read_512Bytes(void)
{
uint32_t data_length;
uint64_t offset;
int pattern;
int expected_rc;
/* Data size = 512 */
data_length = 512;
CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
offset = 2048;
pattern = 0xA3;
/* Params are valid, hence the expected return value
* of write and read for all blockdevs is the data_length */
expected_rc = data_length;
blockdev_write_read(data_length, pattern, offset, expected_rc);
}
static void
blockdev_write_read_size_gt_128k(void)
{
uint32_t data_length;
uint64_t offset;
int pattern;
int expected_rc;
/* Data size = 132K */
data_length = 135168;
CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
offset = 2048;
pattern = 0xA3;
/* Params are valid, hence the expected return value
* of write and read for all blockdevs is the data_length */
expected_rc = data_length;
blockdev_write_read(data_length, pattern, offset, expected_rc);
}
static void
blockdev_write_read_invalid_size(void)
{
uint32_t data_length;
uint64_t offset;
int pattern;
int expected_rc;
/* Data size is not a multiple of the block size */
data_length = 0x1015;
CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
offset = 2048;
pattern = 0xA3;
/* Params are invalid, hence the expected return value
* of write and read for all blockdevs is < 0 */
expected_rc = -1;
blockdev_write_read(data_length, pattern, offset, expected_rc);
}
static void
blockdev_write_read_offset_plus_nbytes_equals_bdev_size(void)
{
struct io_target *target;
struct spdk_bdev *bdev;
char bdev_task_ctx[BDEV_TASK_ARRAY_SIZE];
char *tx_buf = NULL;
char *rx_buf = NULL;
uint64_t offset;
int rc;
target = g_io_targets;
while (target != NULL) {
bdev = target->bdev;
/* The start offset has been set to a marginal value
* such that offset + nbytes == Total size of
* blockdev. */
offset = ((bdev->blockcnt - 1) * bdev->blocklen);
initialize_buffer(&tx_buf, 0xA3, bdev->blocklen);
initialize_buffer(&rx_buf, 0, bdev->blocklen);
rc = blockdev_write(target, (void *)bdev_task_ctx, &tx_buf,
bdev->blocklen, offset);
/* Assert the rc of the respective blockdev */
CU_ASSERT_EQUAL(rc, (int)bdev->blocklen);
/* If the write was successful, the function returns the data_length
* and the completion_status_per_io is 0 */
check_io_completion();
CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_SUCCESS);
rc = blockdev_read(target, (void *)bdev_task_ctx, &rx_buf,
bdev->blocklen, offset);
/* Assert the rc of the respective blockdev */
CU_ASSERT_EQUAL(rc, (int)bdev->blocklen);
/* If the read was successful, the function returns the data_length
* and the completion_status_per_io is 0 */
check_io_completion();
CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_SUCCESS);
rc = blockdev_write_read_data_match(&rx_buf, &tx_buf, bdev->blocklen);
/* Assert the write by comparing it with values read
* from each blockdev */
CU_ASSERT_EQUAL(rc, 0);
target = target->next;
}
}
static void
blockdev_write_read_offset_plus_nbytes_gt_bdev_size(void)
{
struct io_target *target;
struct spdk_bdev *bdev;
char bdev_task_ctx[BDEV_TASK_ARRAY_SIZE];
char *tx_buf = NULL;
char *rx_buf = NULL;
int data_length;
uint64_t offset;
int pattern;
int expected_rc;
int rc;
/* Tests the overflow condition of the blockdevs. */
data_length = 4096;
CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
pattern = 0xA3;
/* Params are invalid, hence the expected return value
* of write and read is < 0.*/
expected_rc = -1;
target = g_io_targets;
while (target != NULL) {
bdev = target->bdev;
/* The start offset has been set to a valid value
* but offset + nbytes is greater than the Total size
* of the blockdev. The test should fail. */
offset = ((bdev->blockcnt * bdev->blocklen) - 1024);
initialize_buffer(&tx_buf, pattern, data_length);
initialize_buffer(&rx_buf, 0, data_length);
rc = blockdev_write(target, (void *)bdev_task_ctx, &tx_buf,
data_length, offset);
/* Assert the rc of the respective blockdev */
CU_ASSERT_EQUAL(rc, expected_rc);
/* If the write failed, the function returns rc<data_length
* and the completion_status_per_io is SPDK_BDEV_IO_STATUS_FAILED */
CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_FAILED);
rc = blockdev_read(target, (void *)bdev_task_ctx, &rx_buf,
data_length, offset);
/* Assert the rc of the respective blockdev */
CU_ASSERT_EQUAL(rc, expected_rc);
/* If the read failed, the function returns rc<data_length
* and the completion_status_per_io is SPDK_BDEV_IO_STATUS_FAILED */
CU_ASSERT_EQUAL(completion_status_per_io, SPDK_BDEV_IO_STATUS_FAILED);
target = target->next;
}
}
static void
blockdev_write_read_max_offset(void)
{
int data_length;
uint64_t offset;
int pattern;
int expected_rc;
data_length = 4096;
CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
/* The start offset has been set to UINT64_MAX such that
* adding nbytes wraps around and points to an invalid address. */
offset = UINT64_MAX;
pattern = 0xA3;
/* Params are invalid, hence the expected return value
* of write and read for all blockdevs is < 0 */
expected_rc = -1;
blockdev_write_read(data_length, pattern, offset, expected_rc);
}
static void
blockdev_overlapped_write_read_8k(void)
{
int data_length;
uint64_t offset;
int pattern;
int expected_rc;
/* Data size = 8K */
data_length = 8192;
CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
offset = 0;
pattern = 0xA3;
/* Params are valid, hence the expected return value
* of write and read for all blockdevs is the data_length */
expected_rc = data_length;
/* Assert the write by comparing it with values read
* from the same offset for each blockdev */
blockdev_write_read(data_length, pattern, offset, expected_rc);
/* Overwrite the pattern 0xbb of size 8K on an address offset overlapping
* with the address written above and assert the new value in
* the overlapped address range */
/* Populate 8k with value 0xBB */
pattern = 0xBB;
/* Offset = 6144; Overlap offset addresses and write value 0xbb */
offset = 4096;
/* Assert the write by comparing it with values read
* from the overlapped offset for each blockdev */
blockdev_write_read(data_length, pattern, offset, expected_rc);
}
int
main(int argc, char **argv)
{
CU_pSuite suite = NULL;
const char *config_file;
unsigned int num_failures;
if (argc == 1) {
config_file = "/usr/local/etc/spdk/iscsi.conf";
} else {
config_file = argv[1];
}
bdevtest_init(config_file, "0x1");
if (bdevio_construct_targets() < 0) {
return 1;
}
if (CU_initialize_registry() != CUE_SUCCESS) {
return CU_get_error();
}
suite = CU_add_suite("components_suite", NULL, NULL);
if (suite == NULL) {
CU_cleanup_registry();
return CU_get_error();
}
if (
CU_add_test(suite, "blockdev write read 4k", blockdev_write_read_4k) == NULL
|| CU_add_test(suite, "blockdev write read 512 bytes",
blockdev_write_read_512Bytes) == NULL
|| CU_add_test(suite, "blockdev write read size > 128k",
blockdev_write_read_size_gt_128k) == NULL
|| CU_add_test(suite, "blockdev write read invalid size",
blockdev_write_read_invalid_size) == NULL
|| CU_add_test(suite, "blockdev write read offset + nbytes == size of blockdev",
blockdev_write_read_offset_plus_nbytes_equals_bdev_size) == NULL
|| CU_add_test(suite, "blockdev write read offset + nbytes > size of blockdev",
blockdev_write_read_offset_plus_nbytes_gt_bdev_size) == NULL
|| CU_add_test(suite, "blockdev write read max offset",
blockdev_write_read_max_offset) == NULL
|| CU_add_test(suite, "blockdev write read 8k on overlapped address offset",
blockdev_overlapped_write_read_8k) == NULL
) {
CU_cleanup_registry();
return CU_get_error();
}
CU_basic_set_mode(CU_BRM_VERBOSE);
CU_basic_run_tests();
num_failures = CU_get_number_of_failures();
CU_cleanup_registry();
return num_failures;
}

1
test/lib/bdev/bdevperf/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
bdevperf

View File

@ -0,0 +1,69 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
APP = bdevperf
C_SRCS := bdevperf.c
CFLAGS += -I. $(DPDK_INC)
SPDK_LIBS += $(SPDK_ROOT_DIR)/lib/bdev/libspdk_bdev.a \
$(SPDK_ROOT_DIR)/lib/copy/libspdk_copy.a \
$(SPDK_ROOT_DIR)/lib/event/libspdk_event.a \
$(SPDK_ROOT_DIR)/lib/trace/libspdk_trace.a \
$(SPDK_ROOT_DIR)/lib/log/libspdk_log.a \
$(SPDK_ROOT_DIR)/lib/conf/libspdk_conf.a \
$(SPDK_ROOT_DIR)/lib/util/libspdk_util.a \
$(SPDK_ROOT_DIR)/lib/memory/libspdk_memory.a \
$(SPDK_ROOT_DIR)/lib/rpc/libspdk_rpc.a \
$(SPDK_ROOT_DIR)/lib/jsonrpc/libspdk_jsonrpc.a \
$(SPDK_ROOT_DIR)/lib/json/libspdk_json.a
LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \
$(COPY_MODULES_LINKER_ARGS)
LIBS += $(SPDK_LIBS) $(PCIACCESS_LIB) $(DPDK_LIB)
all : $(APP)
$(APP) : $(OBJS) $(SPDK_LIBS) $(BLOCKDEV_MODULES) $(COPY_MODULES)
$(LINK_C)
clean :
$(CLEAN_C) $(APP)
include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk

View File

@ -0,0 +1,684 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <rte_config.h>
#include <rte_eal.h>
#include <rte_debug.h>
#include <rte_mempool.h>
#include <rte_cycles.h>
#include <rte_malloc.h>
#include <rte_ring.h>
#include <rte_lcore.h>
#include <rte_timer.h>
#include "spdk/bdev.h"
#include "spdk/bdev_db.h"
#include "spdk/copy_engine.h"
#include "spdk/log.h"
struct bdevperf_task {
struct iovec iov;
struct io_target *target;
void *buf;
};
static int g_io_size = 0;
/* initialize to invalid value so we can detect if user overrides it. */
static int g_rw_percentage = -1;
static int g_is_random;
static bool g_verify = false;
static bool g_reset = false;
static bool g_unmap = false;
static int g_queue_depth;
static int g_time_in_sec;
static int g_show_performance_real_time = 0;
static bool g_run_failed = false;
static bool g_zcopy = true;
static struct rte_timer g_perf_timer;
static void bdevperf_submit_single(struct io_target *target);
#include "../common.c"
struct io_target {
struct spdk_bdev *bdev;
struct io_target *next;
unsigned lcore;
int io_completed;
int current_queue_depth;
uint64_t size_in_ios;
uint64_t offset_in_ios;
bool is_draining;
struct rte_timer run_timer;
struct rte_timer reset_timer;
};
struct io_target *head[RTE_MAX_LCORE];
static int g_target_count = 0;
/*
* Used to determine how the I/O buffers should be aligned.
* This alignment will be bumped up for blockdevs that
* require alignment based on block length - for example,
* AIO blockdevs.
*/
static uint32_t g_min_alignment = 8;
static void
blockdev_heads_init(void)
{
int i;
for (i = 0; i < RTE_MAX_LCORE; i++) {
head[i] = NULL;
}
}
static void
bdevperf_construct_targets(void)
{
int index = 0;
struct blockdev_entry *bdev_entry = g_bdevs;
struct spdk_bdev *bdev;
struct io_target *target;
while (bdev_entry != NULL) {
bdev = bdev_entry->bdev;
if (bdev->claimed) {
bdev_entry = bdev_entry->next;
continue;
}
if (g_unmap && !bdev->thin_provisioning) {
printf("Skipping %s because it does not support unmap\n", bdev->name);
bdev_entry = bdev_entry->next;
continue;
}
target = malloc(sizeof(struct io_target));
if (!target) {
fprintf(stderr, "Unable to allocate memory for new target.\n");
/* Return immediately because all mallocs will presumably fail after this */
return;
}
target->bdev = bdev;
/* Mapping each target to lcore */
index = g_target_count % spdk_app_get_core_count();
target->next = head[index];
target->lcore = index;
target->io_completed = 0;
target->current_queue_depth = 0;
target->offset_in_ios = 0;
target->size_in_ios = (bdev->blockcnt * bdev->blocklen) /
g_io_size;
if (bdev->need_aligned_buffer && g_min_alignment < bdev->blocklen) {
g_min_alignment = bdev->blocklen;
}
target->is_draining = false;
rte_timer_init(&target->run_timer);
rte_timer_init(&target->reset_timer);
head[index] = target;
g_target_count++;
bdev_entry = bdev_entry->next;
}
}
static void
end_run(spdk_event_t event)
{
if (--g_target_count == 0) {
if (g_show_performance_real_time) {
rte_timer_stop_sync(&g_perf_timer);
}
spdk_app_stop(0);
}
}
struct rte_mempool *task_pool;
static void
bdevperf_complete(spdk_event_t event)
{
struct io_target *target;
struct bdevperf_task *task = spdk_event_get_arg1(event);
struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event);
spdk_event_t complete;
if (bdev_io->status != SPDK_BDEV_IO_STATUS_SUCCESS) {
g_run_failed = true;
} else if (g_verify || g_reset || g_unmap) {
if (memcmp(task->buf, bdev_io->u.read.buf, g_io_size) != 0) {
printf("Buffer mismatch! Disk Offset: %lu\n", bdev_io->u.read.offset);
g_run_failed = true;
}
}
target = task->target;
target->current_queue_depth--;
target->io_completed++;
bdev_io->caller_ctx = NULL;
rte_mempool_put(task_pool, task);
spdk_bdev_free_io(bdev_io);
/*
* is_draining indicates when time has expired for the test run
* and we are just waiting for the previously submitted I/O
* to complete. In this case, do not submit a new I/O to replace
* the one just completed.
*/
if (!target->is_draining) {
bdevperf_submit_single(target);
} else if (target->current_queue_depth == 0) {
complete = spdk_event_allocate(rte_get_master_lcore(), end_run, NULL, NULL, NULL);
spdk_event_call(complete);
}
}
static void
bdevperf_unmap_complete(spdk_event_t event)
{
struct io_target *target;
struct bdevperf_task *task = spdk_event_get_arg1(event);
struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event);
target = task->target;
/* Set the expected buffer to 0. */
memset(task->buf, 0, g_io_size);
/* Read the data back in */
spdk_bdev_read(target->bdev, NULL,
be32toh(bdev_io->u.unmap.unmap_bdesc->block_count) * target->bdev->blocklen,
be64toh(bdev_io->u.unmap.unmap_bdesc->lba) * target->bdev->blocklen,
bdevperf_complete, task);
free(bdev_io->u.unmap.unmap_bdesc);
spdk_bdev_free_io(bdev_io);
}
static void
bdevperf_verify_write_complete(spdk_event_t event)
{
struct io_target *target;
struct bdevperf_task *task = spdk_event_get_arg1(event);
struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event);
target = task->target;
if (g_unmap) {
/* Unmap the data */
struct spdk_scsi_unmap_bdesc *bdesc = calloc(1, sizeof(*bdesc));
if (bdesc == NULL) {
fprintf(stderr, "memory allocation failure\n");
exit(1);
}
bdesc->lba = htobe64(bdev_io->u.write.offset / target->bdev->blocklen);
bdesc->block_count = htobe32(bdev_io->u.write.len / target->bdev->blocklen);
spdk_bdev_unmap(target->bdev, bdesc, 1, bdevperf_unmap_complete,
task);
} else {
/* Read the data back in */
spdk_bdev_read(target->bdev, NULL,
bdev_io->u.write.len,
bdev_io->u.write.offset,
bdevperf_complete, task);
}
spdk_bdev_free_io(bdev_io);
}
static void
task_ctor(struct rte_mempool *mp, void *arg, void *__task, unsigned id)
{
struct bdevperf_task *task = __task;
task->buf = rte_malloc(NULL, g_io_size, g_min_alignment);
}
static __thread unsigned int seed = 0;
static void
bdevperf_submit_single(struct io_target *target)
{
struct spdk_bdev *bdev;
struct bdevperf_task *task = NULL;
uint64_t offset_in_ios;
void *rbuf;
bdev = target->bdev;
if (rte_mempool_get(task_pool, (void **)&task) != 0 || task == NULL) {
printf("Task pool allocation failed\n");
abort();
}
task->target = target;
if (g_is_random) {
offset_in_ios = rand_r(&seed) % target->size_in_ios;
} else {
offset_in_ios = target->offset_in_ios++;
if (target->offset_in_ios == target->size_in_ios) {
target->offset_in_ios = 0;
}
}
if (g_verify || g_reset || g_unmap) {
memset(task->buf, rand_r(&seed) % 256, g_io_size);
task->iov.iov_base = task->buf;
task->iov.iov_len = g_io_size;
spdk_bdev_writev(bdev, &task->iov, 1, g_io_size,
offset_in_ios * g_io_size,
bdevperf_verify_write_complete, task);
} else if ((g_rw_percentage == 100) ||
(g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) {
rbuf = g_zcopy ? NULL : task->buf;
spdk_bdev_read(bdev, rbuf, g_io_size,
offset_in_ios * g_io_size,
bdevperf_complete, task);
} else {
task->iov.iov_base = task->buf;
task->iov.iov_len = g_io_size;
spdk_bdev_writev(bdev, &task->iov, 1, g_io_size,
offset_in_ios * g_io_size,
bdevperf_complete, task);
}
target->current_queue_depth++;
}
static void
bdevperf_submit_io(struct io_target *target, int queue_depth)
{
while (queue_depth-- > 0) {
bdevperf_submit_single(target);
}
}
static void
end_target(struct rte_timer *timer, void *arg)
{
struct io_target *target = arg;
if (g_reset) {
rte_timer_stop_sync(&target->reset_timer);
}
target->is_draining = true;
}
static void reset_target(struct rte_timer *timer, void *arg);
static void
reset_cb(spdk_event_t event)
{
struct spdk_bdev_io *bdev_io = spdk_event_get_arg2(event);
int status = bdev_io->status;
struct bdevperf_task *task = bdev_io->caller_ctx;
struct io_target *target = task->target;
if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
printf("Reset blockdev=%s failed\n", target->bdev->name);
g_run_failed = true;
}
rte_mempool_put(task_pool, task);
rte_timer_reset(&target->reset_timer, rte_get_timer_hz() * 10, SINGLE,
target->lcore, reset_target, target);
}
static void
reset_target(struct rte_timer *timer, void *arg)
{
struct io_target *target = arg;
struct bdevperf_task *task = NULL;
/* Do reset. */
rte_mempool_get(task_pool, (void **)&task);
task->target = target;
spdk_bdev_reset(target->bdev, SPDK_BDEV_RESET_SOFT,
reset_cb, task);
}
static void
bdevperf_submit_on_core(spdk_event_t event)
{
struct io_target *target = spdk_event_get_arg1(event);
/* Submit initial I/O for each block device. Each time one
* completes, another will be submitted. */
while (target != NULL) {
/* Start a timer to stop this I/O chain when the run is over */
rte_timer_reset(&target->run_timer, rte_get_timer_hz() * g_time_in_sec, SINGLE,
target->lcore, end_target, target);
if (g_reset) {
rte_timer_reset(&target->reset_timer, rte_get_timer_hz() * 10, SINGLE,
target->lcore, reset_target, target);
}
bdevperf_submit_io(target, g_queue_depth);
target = target->next;
}
}
static void usage(char *program_name)
{
printf("%s options\n", program_name);
printf("\t[-c configuration file]\n");
printf("\t[-m core mask for distributing I/O submission/completion work\n");
printf("\t\t(default: 0x1 - use core 0 only)]\n");
printf("\t[-q io depth]\n");
printf("\t[-s io size in bytes]\n");
printf("\t[-w io pattern type, must be one of\n");
printf("\t\t(read, write, randread, randwrite, rw, randrw, verify, reset)]\n");
printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
printf("\t[-t time in seconds]\n");
printf("\t[-S Show performance result in real time]\n");
}
static void
performance_dump(int io_time)
{
int index;
unsigned lcore_id;
float io_per_second, mb_per_second;
float total_io_per_second, total_mb_per_second;
struct io_target *target;
total_io_per_second = 0;
total_mb_per_second = 0;
for (index = 0; index < spdk_app_get_core_count(); index++) {
target = head[index];
if (target != NULL) {
lcore_id = target->lcore;
printf("\r Logical core: %d\n", lcore_id);
}
while (target != NULL) {
io_per_second = (float)target->io_completed /
io_time;
mb_per_second = io_per_second * g_io_size /
(1024 * 1024);
printf("\r %-20s: %10.2f IO/s %10.2f MB/s\n",
target->bdev->name, io_per_second,
mb_per_second);
total_io_per_second += io_per_second;
total_mb_per_second += mb_per_second;
target = target->next;
}
}
printf("\r =====================================================\n");
printf("\r %-20s: %10.2f IO/s %10.2f MB/s\n",
"Total", total_io_per_second, total_mb_per_second);
fflush(stdout);
}
static void
performance_statistics_thread(struct rte_timer *timer, void *arg)
{
performance_dump(1);
}
static void
bdevperf_run(spdk_event_t evt)
{
int i;
struct io_target *target;
spdk_event_t event;
printf("Running I/O for %d seconds...\n", g_time_in_sec);
fflush(stdout);
/* Start a timer to dump performance numbers */
if (g_show_performance_real_time) {
rte_timer_init(&g_perf_timer);
rte_timer_reset(&g_perf_timer, rte_get_timer_hz(), PERIODICAL,
rte_get_master_lcore(), performance_statistics_thread, NULL);
}
/* Send events to start all I/O */
RTE_LCORE_FOREACH(i) {
if (spdk_app_get_core_mask() & (1ULL << i)) {
target = head[i];
if (target != NULL) {
event = spdk_event_allocate(target->lcore, bdevperf_submit_on_core,
target, NULL, NULL);
spdk_event_call(event);
}
}
}
}
int
main(int argc, char **argv)
{
const char *config_file;
const char *core_mask;
const char *workload_type;
int op;
bool mix_specified;
/* default value*/
config_file = NULL;
g_queue_depth = 0;
g_io_size = 0;
workload_type = NULL;
g_time_in_sec = 0;
mix_specified = false;
core_mask = NULL;
while ((op = getopt(argc, argv, "c:m:q:s:t:w:M:S")) != -1) {
switch (op) {
case 'c':
config_file = optarg;
break;
case 'm':
core_mask = optarg;
break;
case 'q':
g_queue_depth = atoi(optarg);
break;
case 's':
g_io_size = atoi(optarg);
break;
case 't':
g_time_in_sec = atoi(optarg);
break;
case 'w':
workload_type = optarg;
break;
case 'M':
g_rw_percentage = atoi(optarg);
mix_specified = true;
break;
case 'S':
g_show_performance_real_time = 1;
break;
default:
usage(argv[0]);
exit(1);
}
}
if (!config_file) {
usage(argv[0]);
exit(1);
}
if (!g_queue_depth) {
usage(argv[0]);
exit(1);
}
if (!g_io_size) {
usage(argv[0]);
exit(1);
}
if (!workload_type) {
usage(argv[0]);
exit(1);
}
if (!g_time_in_sec) {
usage(argv[0]);
exit(1);
}
if (strcmp(workload_type, "read") &&
strcmp(workload_type, "write") &&
strcmp(workload_type, "randread") &&
strcmp(workload_type, "randwrite") &&
strcmp(workload_type, "rw") &&
strcmp(workload_type, "randrw") &&
strcmp(workload_type, "verify") &&
strcmp(workload_type, "reset") &&
strcmp(workload_type, "unmap")) {
fprintf(stderr,
"io pattern type must be one of\n"
"(read, write, randread, randwrite, rw, randrw, verify, reset, unmap)\n");
exit(1);
}
if (!strcmp(workload_type, "read") ||
!strcmp(workload_type, "randread")) {
g_rw_percentage = 100;
}
if (!strcmp(workload_type, "write") ||
!strcmp(workload_type, "randwrite")) {
g_rw_percentage = 0;
}
if (!strcmp(workload_type, "verify") ||
!strcmp(workload_type, "reset") ||
!strcmp(workload_type, "unmap")) {
g_rw_percentage = 50;
if (g_io_size > SPDK_BDEV_LARGE_RBUF_MAX_SIZE) {
fprintf(stderr, "Unable to exceed max I/O size of %d for verify. (%d provided).\n",
SPDK_BDEV_LARGE_RBUF_MAX_SIZE, g_io_size);
exit(1);
}
if (core_mask) {
fprintf(stderr, "Ignoring -m option. Verify can only run with a single core.\n");
core_mask = NULL;
}
g_verify = true;
if (!strcmp(workload_type, "reset")) {
g_reset = true;
}
if (!strcmp(workload_type, "unmap")) {
g_unmap = true;
}
}
if (!strcmp(workload_type, "read") ||
!strcmp(workload_type, "randread") ||
!strcmp(workload_type, "write") ||
!strcmp(workload_type, "randwrite") ||
!strcmp(workload_type, "verify") ||
!strcmp(workload_type, "reset") ||
!strcmp(workload_type, "unmap")) {
if (mix_specified) {
fprintf(stderr, "Ignoring -M option... Please use -M option"
" only when using rw or randrw.\n");
}
}
if (!strcmp(workload_type, "rw") ||
!strcmp(workload_type, "randrw")) {
if (g_rw_percentage < 0 || g_rw_percentage > 100) {
fprintf(stderr,
"-M must be specified to value from 0 to 100 "
"for rw or randrw.\n");
exit(1);
}
}
if (!strcmp(workload_type, "read") ||
!strcmp(workload_type, "write") ||
!strcmp(workload_type, "rw") ||
!strcmp(workload_type, "verify") ||
!strcmp(workload_type, "reset") ||
!strcmp(workload_type, "unmap")) {
g_is_random = 0;
} else {
g_is_random = 1;
}
if (g_io_size > SPDK_BDEV_LARGE_RBUF_MAX_SIZE) {
fprintf(stdout, "I/O size of %d is greather than zero copy threshold (%d).\n",
g_io_size, SPDK_BDEV_LARGE_RBUF_MAX_SIZE);
fprintf(stdout, "Zero copy mechanism will not be used.\n");
g_zcopy = false;
}
optind = 1; /*reset the optind */
rte_set_log_level(RTE_LOG_ERR);
blockdev_heads_init();
bdevtest_init(config_file, core_mask);
bdevperf_construct_targets();
if (g_bdevs == NULL) {
printf("No blockdevs available.\n");
return 1;
}
task_pool = rte_mempool_create("task_pool", 4096 * spdk_app_get_core_count(),
sizeof(struct bdevperf_task),
64, 0, NULL, NULL, task_ctor, NULL,
SOCKET_ID_ANY, 0);
spdk_app_start(bdevperf_run, NULL, NULL);
performance_dump(g_time_in_sec);
spdk_app_fini();
printf("done.\n");
return 0;
}

42
test/lib/bdev/blockdev.sh Executable file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env bash
set -e
testdir=$(readlink -f $(dirname $0))
rootdir=$testdir/../../..
source $rootdir/scripts/autotest_common.sh
testdir=$(readlink -f $(dirname $0))
timing_enter blockdev
timing_enter bounds
$testdir/bdevio/bdevio $testdir/bdev.conf
process_core
timing_exit bounds
timing_enter verify
$testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 32 -s 4096 -w verify -t 5
process_core
timing_exit verify
# Use size 192KB which both exceeds typical 128KB max NVMe I/O
# size and will cross 128KB Intel DC P3700 stripe boundaries.
timing_enter perf
$testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 128 -w read -s 196608 -t 5
process_core
timing_exit perf
if [ $RUN_NIGHTLY -eq 1 ]; then
timing_enter reset
$testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 16 -w reset -s 4096 -t 60
process_core
timing_exit reset
timing_enter unmap
$testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 1 -w unmap -s 4096 -t 60
process_core
timing_exit unmap
fi
timing_exit blockdev

95
test/lib/bdev/common.c Normal file
View File

@ -0,0 +1,95 @@
/*-
* BSD LICENSE
*
* Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* This file is included in the bdev test tools, not compiled separately. */
#include "spdk/event.h"
struct blockdev_entry {
struct spdk_bdev *bdev;
struct blockdev_entry *next;
};
struct blockdev_entry *g_bdevs = NULL;
int
spdk_bdev_db_add(struct spdk_bdev *bdev)
{
struct blockdev_entry *bdev_entry = calloc(1, sizeof(struct blockdev_entry));
if (bdev_entry == NULL) {
return -ENOMEM;
}
bdev_entry->bdev = bdev;
bdev_entry->next = g_bdevs;
g_bdevs = bdev_entry;
return 0;
}
int
spdk_bdev_db_delete(struct spdk_bdev *bdev)
{
/* Deleting is not important */
return 0;
}
struct spdk_bdev *
spdk_bdev_db_get_by_name(const char *bdev_name)
{
struct blockdev_entry *bdev_entry = g_bdevs;
while (bdev_entry != NULL) {
if (strcmp(bdev_name, bdev_entry->bdev->name) == 0) {
return bdev_entry->bdev;
}
bdev_entry = bdev_entry->next;
}
return NULL;
}
static void
bdevtest_init(const char *config_file, const char *cpumask)
{
struct spdk_app_opts opts;
spdk_app_opts_init(&opts);
opts.name = "bdevtest";
opts.config_file = config_file;
opts.reactor_mask = cpumask;
spdk_app_init(&opts);
}