Spdk/lib/ftl/ftl_init.c
Wojciech Malikowski 6557c2bf82 lib/ftl: IO retry queue associated with io channel
Process IO retries on IO channel poller. This patch
replaces retry queue associated with device with the
queue associated with IO channel. This patch also adds
putting IO write request on retry queue in case write
buffer is full instead thread message sending.

Change-Id: Iec9c9a4e858c90f1fe344bb298ff13ed0e13397f
Signed-off-by: Wojciech Malikowski <wojciech.malikowski@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/543
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2020-02-20 09:51:28 +00:00

1401 lines
34 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spdk/stdinc.h"
#include "spdk/nvme.h"
#include "spdk/io_channel.h"
#include "spdk/string.h"
#include "spdk/likely.h"
#include "spdk_internal/log.h"
#include "spdk/ftl.h"
#include "spdk/likely.h"
#include "spdk/string.h"
#include "spdk/bdev_zone.h"
#include "spdk/bdev_module.h"
#include "ftl_core.h"
#include "ftl_io.h"
#include "ftl_reloc.h"
#include "ftl_rwb.h"
#include "ftl_band.h"
#include "ftl_debug.h"
#define FTL_CORE_RING_SIZE 4096
#define FTL_INIT_TIMEOUT 30
#define FTL_NSID 1
#define FTL_ZONE_INFO_COUNT 64
/* Dummy bdev module used to to claim bdevs. */
static struct spdk_bdev_module g_ftl_bdev_module = {
.name = "ftl_lib",
};
typedef void (*spdk_ftl_init_fn)(struct spdk_ftl_dev *, void *, int);
struct ftl_dev_init_ctx {
/* Owner */
struct spdk_ftl_dev *dev;
/* Initial arguments */
struct spdk_ftl_dev_init_opts opts;
/* IO channel for zone info retrieving */
struct spdk_io_channel *ioch;
/* Buffer for reading zone info */
struct spdk_bdev_zone_info info[FTL_ZONE_INFO_COUNT];
/* Currently read zone */
size_t zone_id;
/* User's callback */
spdk_ftl_init_fn cb_fn;
/* Callback's argument */
void *cb_arg;
/* Thread to call the callback on */
struct spdk_thread *thread;
/* Poller to check if the device has been destroyed/initialized */
struct spdk_poller *poller;
/* Status to return for halt completion callback */
int halt_complete_status;
};
static STAILQ_HEAD(, spdk_ftl_dev) g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
static pthread_mutex_t g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
static const struct spdk_ftl_conf g_default_conf = {
.limits = {
/* 5 free bands / 0 % host writes */
[SPDK_FTL_LIMIT_CRIT] = { .thld = 5, .limit = 0 },
/* 10 free bands / 5 % host writes */
[SPDK_FTL_LIMIT_HIGH] = { .thld = 10, .limit = 5 },
/* 20 free bands / 40 % host writes */
[SPDK_FTL_LIMIT_LOW] = { .thld = 20, .limit = 40 },
/* 40 free bands / 100 % host writes - defrag starts running */
[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
},
/* 10 percent valid blocks */
.invalid_thld = 10,
/* 20% spare blocks */
.lba_rsvd = 20,
/* 6M write buffer */
.rwb_size = 6 * 1024 * 1024,
/* 90% band fill threshold */
.band_thld = 90,
/* Max 32 IO depth per band relocate */
.max_reloc_qdepth = 32,
/* Max 3 active band relocates */
.max_active_relocs = 3,
/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
.user_io_pool_size = 2048,
/* Number of interleaving units per ws_opt */
/* 1 for default and 3 for 3D TLC NAND */
.num_interleave_units = 1,
/*
* If clear ftl will return error when restoring after a dirty shutdown
* If set, last band will be padded, ftl will restore based only on closed bands - this
* will result in lost data after recovery.
*/
.allow_open_bands = false,
.nv_cache = {
/* Maximum number of concurrent requests */
.max_request_cnt = 2048,
/* Maximum number of blocks per request */
.max_request_size = 16,
}
};
static int
ftl_band_init_md(struct ftl_band *band)
{
struct ftl_lba_map *lba_map = &band->lba_map;
lba_map->vld = spdk_bit_array_create(ftl_get_num_blocks_in_band(band->dev));
if (!lba_map->vld) {
return -ENOMEM;
}
pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
ftl_band_md_clear(band);
return 0;
}
static int
ftl_check_conf(const struct spdk_ftl_dev *dev, const struct spdk_ftl_conf *conf)
{
size_t i;
if (conf->invalid_thld >= 100) {
return -1;
}
if (conf->lba_rsvd >= 100) {
return -1;
}
if (conf->lba_rsvd == 0) {
return -1;
}
if (conf->rwb_size == 0) {
return -1;
}
if (conf->rwb_size % FTL_BLOCK_SIZE != 0) {
return -1;
}
if (dev->xfer_size % conf->num_interleave_units != 0) {
return -1;
}
for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
if (conf->limits[i].limit > 100) {
return -1;
}
}
return 0;
}
static int
ftl_dev_init_bands(struct spdk_ftl_dev *dev)
{
struct ftl_band *band, *pband;
unsigned int i;
int rc = 0;
LIST_INIT(&dev->free_bands);
LIST_INIT(&dev->shut_bands);
dev->num_free = 0;
dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
if (!dev->bands) {
return -1;
}
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
band = &dev->bands[i];
band->id = i;
band->dev = dev;
band->state = FTL_BAND_STATE_CLOSED;
if (LIST_EMPTY(&dev->shut_bands)) {
LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
} else {
LIST_INSERT_AFTER(pband, band, list_entry);
}
pband = band;
CIRCLEQ_INIT(&band->zones);
band->zone_buf = calloc(ftl_get_num_punits(dev), sizeof(*band->zone_buf));
if (!band->zone_buf) {
SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
rc = -1;
break;
}
rc = ftl_band_init_md(band);
if (rc) {
SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
break;
}
band->reloc_bitmap = spdk_bit_array_create(ftl_get_num_bands(dev));
if (!band->reloc_bitmap) {
SPDK_ERRLOG("Failed to allocate band relocation bitmap\n");
break;
}
}
return rc;
}
static void
ftl_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
{
struct spdk_ftl_dev *dev = event_ctx;
switch (type) {
case SPDK_BDEV_EVENT_REMOVE:
assert(0);
break;
case SPDK_BDEV_EVENT_MEDIA_MANAGEMENT:
assert(bdev == spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
ftl_get_media_events(dev);
default:
break;
}
}
static int
ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, const char *bdev_name)
{
struct spdk_bdev *bdev;
struct spdk_ftl_conf *conf = &dev->conf;
struct ftl_nv_cache *nv_cache = &dev->nv_cache;
char pool_name[128];
int rc;
if (!bdev_name) {
return 0;
}
bdev = spdk_bdev_get_by_name(bdev_name);
if (!bdev) {
SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
return -1;
}
if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
dev, &nv_cache->bdev_desc)) {
SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
return -1;
}
if (spdk_bdev_module_claim_bdev(bdev, nv_cache->bdev_desc, &g_ftl_bdev_module)) {
spdk_bdev_close(nv_cache->bdev_desc);
nv_cache->bdev_desc = NULL;
SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
return -1;
}
SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
spdk_bdev_get_name(bdev));
if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
return -1;
}
if (!spdk_bdev_is_md_separate(bdev)) {
SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
spdk_bdev_get_name(bdev));
return -1;
}
if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
return -1;
}
if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
SPDK_ERRLOG("Unsupported DIF type used by bdev %s\n",
spdk_bdev_get_name(bdev));
return -1;
}
/* The cache needs to be capable of storing at least two full bands. This requirement comes
* from the fact that cache works as a protection against power loss, so before the data
* inside the cache can be overwritten, the band it's stored on has to be closed. Plus one
* extra block is needed to store the header.
*/
if (spdk_bdev_get_num_blocks(bdev) < ftl_get_num_blocks_in_band(dev) * 2 + 1) {
SPDK_ERRLOG("Insufficient number of blocks for write buffer cache (available: %"
PRIu64", required: %"PRIu64")\n", spdk_bdev_get_num_blocks(bdev),
ftl_get_num_blocks_in_band(dev) * 2 + 1);
return -1;
}
rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
if (rc < 0 || rc >= 128) {
return -1;
}
nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
spdk_bdev_get_md_size(bdev) *
conf->nv_cache.max_request_size,
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY);
if (!nv_cache->md_pool) {
SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
return -1;
}
nv_cache->dma_buf = spdk_dma_zmalloc(FTL_BLOCK_SIZE, spdk_bdev_get_buf_align(bdev), NULL);
if (!nv_cache->dma_buf) {
SPDK_ERRLOG("Memory allocation failure\n");
return -1;
}
if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
SPDK_ERRLOG("Failed to initialize cache lock\n");
return -1;
}
nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
nv_cache->num_data_blocks = spdk_bdev_get_num_blocks(bdev) - 1;
nv_cache->num_available = nv_cache->num_data_blocks;
nv_cache->ready = false;
return 0;
}
void
spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
{
*conf = g_default_conf;
}
static void
ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
{
struct ftl_lba_map_request *request = obj;
struct spdk_ftl_dev *dev = opaque;
request->segments = spdk_bit_array_create(spdk_divide_round_up(
ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK));
}
static int
ftl_init_media_events_pool(struct spdk_ftl_dev *dev)
{
char pool_name[128];
int rc;
rc = snprintf(pool_name, sizeof(pool_name), "ftl-media-%p", dev);
if (rc < 0 || rc >= (int)sizeof(pool_name)) {
SPDK_ERRLOG("Failed to create media pool name\n");
return -1;
}
dev->media_events_pool = spdk_mempool_create(pool_name, 1024,
sizeof(struct ftl_media_event),
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY);
if (!dev->media_events_pool) {
SPDK_ERRLOG("Failed to create media events pool\n");
return -1;
}
return 0;
}
static int
ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
{
#define POOL_NAME_LEN 128
char pool_name[POOL_NAME_LEN];
int rc;
rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lba-pool");
if (rc < 0 || rc >= POOL_NAME_LEN) {
return -ENAMETOOLONG;
}
/* We need to reserve at least 2 buffers for band close / open sequence
* alone, plus additional (8) buffers for handling write errors.
* TODO: This memory pool is utilized only by core thread - it introduce
* unnecessary overhead and should be replaced by different data structure.
*/
dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
ftl_lba_map_pool_elem_size(dev),
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY);
if (!dev->lba_pool) {
return -ENOMEM;
}
rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lbareq-pool");
if (rc < 0 || rc >= POOL_NAME_LEN) {
return -ENAMETOOLONG;
}
dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
sizeof(struct ftl_lba_map_request),
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
SPDK_ENV_SOCKET_ID_ANY,
ftl_lba_map_request_ctor,
dev);
if (!dev->lba_request_pool) {
return -ENOMEM;
}
return 0;
}
static void
ftl_init_wptr_list(struct spdk_ftl_dev *dev)
{
LIST_INIT(&dev->wptr_list);
LIST_INIT(&dev->flush_list);
LIST_INIT(&dev->band_flush_list);
}
static size_t
ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
{
struct ftl_band *band;
size_t seq = 0;
LIST_FOREACH(band, &dev->shut_bands, list_entry) {
if (band->seq > seq) {
seq = band->seq;
}
}
return seq;
}
static void
_ftl_init_bands_state(void *ctx)
{
struct ftl_band *band, *temp_band;
struct spdk_ftl_dev *dev = ctx;
dev->seq = ftl_dev_band_max_seq(dev);
LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
if (!band->lba_map.num_vld) {
ftl_band_set_state(band, FTL_BAND_STATE_FREE);
}
}
ftl_reloc_resume(dev->reloc);
/* Clear the limit applications as they're incremented incorrectly by */
/* the initialization code */
memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
}
static int
ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
{
struct ftl_band *band;
int cnt = 0;
LIST_FOREACH(band, &dev->shut_bands, list_entry) {
if (band->num_zones && !band->lba_map.num_vld) {
cnt++;
}
}
return cnt;
}
static int
ftl_init_bands_state(struct spdk_ftl_dev *dev)
{
/* TODO: Should we abort initialization or expose read only device */
/* if there is no free bands? */
/* If we abort initialization should we depend on condition that */
/* we have no free bands or should we have some minimal number of */
/* free bands? */
if (!ftl_init_num_free_bands(dev)) {
return -1;
}
spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
return 0;
}
static void
_ftl_dev_init_core_thread(void *ctx)
{
struct ftl_thread *thread = ctx;
struct spdk_ftl_dev *dev = thread->dev;
thread->poller = spdk_poller_register(thread->poller_fn, thread, thread->period_us);
if (!thread->poller) {
SPDK_ERRLOG("Unable to register poller\n");
assert(0);
}
thread->ioch = spdk_get_io_channel(dev);
}
static int
ftl_dev_init_core_thread(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
{
struct ftl_thread *thread = &dev->core_thread;
if (!opts->core_thread) {
return -1;
}
thread->dev = dev;
thread->poller_fn = ftl_task_core;
thread->thread = opts->core_thread;
thread->period_us = 0;
spdk_thread_send_msg(opts->core_thread, _ftl_dev_init_core_thread, thread);
return 0;
}
static void
ftl_dev_free_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread)
{
assert(thread->poller == NULL);
spdk_put_io_channel(thread->ioch);
thread->thread = NULL;
thread->ioch = NULL;
}
static int
ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
{
size_t addr_size;
uint64_t i;
if (dev->num_lbas == 0) {
SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Invalid l2p table size\n");
return -1;
}
if (dev->l2p) {
SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "L2p table already allocated\n");
return -1;
}
addr_size = dev->addr_len >= 32 ? 8 : 4;
dev->l2p = malloc(dev->num_lbas * addr_size);
if (!dev->l2p) {
SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Failed to allocate l2p table\n");
return -1;
}
for (i = 0; i < dev->num_lbas; ++i) {
ftl_l2p_set(dev, i, ftl_to_addr(FTL_ADDR_INVALID));
}
return 0;
}
static void
ftl_dev_free_init_ctx(struct ftl_dev_init_ctx *init_ctx)
{
if (!init_ctx) {
return;
}
if (init_ctx->ioch) {
spdk_put_io_channel(init_ctx->ioch);
}
free(init_ctx);
}
static void
ftl_call_init_complete_cb(void *ctx)
{
struct ftl_dev_init_ctx *init_ctx = ctx;
struct spdk_ftl_dev *dev = init_ctx->dev;
if (init_ctx->cb_fn != NULL) {
init_ctx->cb_fn(dev, init_ctx->cb_arg, 0);
}
ftl_dev_free_init_ctx(init_ctx);
}
static void
ftl_init_complete(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
pthread_mutex_lock(&g_ftl_queue_lock);
STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
pthread_mutex_unlock(&g_ftl_queue_lock);
dev->initialized = 1;
spdk_thread_send_msg(init_ctx->thread, ftl_call_init_complete_cb, init_ctx);
}
static void
ftl_init_fail_cb(struct spdk_ftl_dev *dev, void *ctx, int status)
{
struct ftl_dev_init_ctx *init_ctx = ctx;
if (init_ctx->cb_fn != NULL) {
init_ctx->cb_fn(NULL, init_ctx->cb_arg, -ENODEV);
}
ftl_dev_free_init_ctx(init_ctx);
}
static int _spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
struct spdk_thread *thread);
static void
ftl_init_fail(struct ftl_dev_init_ctx *init_ctx)
{
if (_spdk_ftl_dev_free(init_ctx->dev, ftl_init_fail_cb, init_ctx, init_ctx->thread)) {
SPDK_ERRLOG("Unable to free the device\n");
assert(0);
}
}
static void
ftl_write_nv_cache_md_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
struct spdk_ftl_dev *dev = init_ctx->dev;
spdk_bdev_free_io(bdev_io);
if (spdk_unlikely(!success)) {
SPDK_ERRLOG("Writing non-volatile cache's metadata header failed\n");
ftl_init_fail(init_ctx);
return;
}
dev->nv_cache.ready = true;
ftl_init_complete(init_ctx);
}
static void
ftl_clear_nv_cache_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
struct spdk_ftl_dev *dev = init_ctx->dev;
struct ftl_nv_cache *nv_cache = &dev->nv_cache;
spdk_bdev_free_io(bdev_io);
if (spdk_unlikely(!success)) {
SPDK_ERRLOG("Unable to clear the non-volatile cache bdev\n");
ftl_init_fail(init_ctx);
return;
}
nv_cache->phase = 1;
if (ftl_nv_cache_write_header(nv_cache, false, ftl_write_nv_cache_md_cb, init_ctx)) {
SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
ftl_init_fail(init_ctx);
}
}
static void
_ftl_nv_cache_scrub(void *ctx)
{
struct ftl_dev_init_ctx *init_ctx = ctx;
struct spdk_ftl_dev *dev = init_ctx->dev;
int rc;
rc = ftl_nv_cache_scrub(&dev->nv_cache, ftl_clear_nv_cache_cb, init_ctx);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("Unable to clear the non-volatile cache bdev: %s\n",
spdk_strerror(-rc));
ftl_init_fail(init_ctx);
}
}
static int
ftl_setup_initial_state(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
struct spdk_ftl_conf *conf = &dev->conf;
size_t i;
spdk_uuid_generate(&dev->uuid);
dev->num_lbas = 0;
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
dev->num_lbas += ftl_band_num_usable_blocks(&dev->bands[i]);
}
dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
if (ftl_dev_l2p_alloc(dev)) {
SPDK_ERRLOG("Unable to init l2p table\n");
return -1;
}
if (ftl_init_bands_state(dev)) {
SPDK_ERRLOG("Unable to finish the initialization\n");
return -1;
}
if (!ftl_dev_has_nv_cache(dev)) {
ftl_init_complete(init_ctx);
} else {
spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_nv_cache_scrub, init_ctx);
}
return 0;
}
static void
ftl_restore_nv_cache_cb(struct ftl_restore *restore, int status, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
if (spdk_unlikely(status != 0)) {
SPDK_ERRLOG("Failed to restore the non-volatile cache state\n");
ftl_init_fail(init_ctx);
return;
}
ftl_init_complete(init_ctx);
}
static void
ftl_restore_device_cb(struct ftl_restore *restore, int status, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
struct spdk_ftl_dev *dev = init_ctx->dev;
if (status) {
SPDK_ERRLOG("Failed to restore the device from the SSD\n");
ftl_init_fail(init_ctx);
return;
}
if (ftl_init_bands_state(dev)) {
SPDK_ERRLOG("Unable to finish the initialization\n");
ftl_init_fail(init_ctx);
return;
}
if (!ftl_dev_has_nv_cache(dev)) {
ftl_init_complete(init_ctx);
return;
}
ftl_restore_nv_cache(restore, ftl_restore_nv_cache_cb, init_ctx);
}
static void
ftl_restore_md_cb(struct ftl_restore *restore, int status, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
if (status) {
SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
goto error;
}
/* After the metadata is read it should be possible to allocate the L2P */
if (ftl_dev_l2p_alloc(init_ctx->dev)) {
SPDK_ERRLOG("Failed to allocate the L2P\n");
goto error;
}
if (ftl_restore_device(restore, ftl_restore_device_cb, init_ctx)) {
SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
goto error;
}
return;
error:
ftl_init_fail(init_ctx);
}
static int
ftl_restore_state(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
dev->uuid = init_ctx->opts.uuid;
if (ftl_restore_md(dev, ftl_restore_md_cb, init_ctx)) {
SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
return -1;
}
return 0;
}
static void
ftl_dev_update_bands(struct spdk_ftl_dev *dev)
{
struct ftl_band *band, *temp_band;
size_t i;
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
band = &dev->bands[i];
band->tail_md_addr = ftl_band_tail_md_addr(band);
}
/* Remove band from shut_bands list to prevent further processing */
/* if all blocks on this band are bad */
LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
if (!band->num_zones) {
dev->num_bands--;
LIST_REMOVE(band, list_entry);
}
}
}
static void
ftl_dev_init_state(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
ftl_dev_update_bands(dev);
if (ftl_dev_init_core_thread(dev, &init_ctx->opts)) {
SPDK_ERRLOG("Unable to initialize device thread\n");
ftl_init_fail(init_ctx);
return;
}
if (init_ctx->opts.mode & SPDK_FTL_MODE_CREATE) {
if (ftl_setup_initial_state(init_ctx)) {
SPDK_ERRLOG("Failed to setup initial state of the device\n");
ftl_init_fail(init_ctx);
return;
}
} else {
if (ftl_restore_state(init_ctx)) {
SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
ftl_init_fail(init_ctx);
return;
}
}
}
static void ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx);
static void
ftl_dev_get_zone_info_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct ftl_dev_init_ctx *init_ctx = cb_arg;
struct spdk_ftl_dev *dev = init_ctx->dev;
struct ftl_band *band;
struct ftl_zone *zone;
struct ftl_addr addr;
size_t i, zones_left, num_zones;
spdk_bdev_free_io(bdev_io);
if (spdk_unlikely(!success)) {
SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
ftl_init_fail(init_ctx);
return;
}
zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
for (i = 0; i < num_zones; ++i) {
addr.offset = init_ctx->info[i].zone_id;
band = &dev->bands[ftl_addr_get_band(dev, addr)];
zone = &band->zone_buf[ftl_addr_get_punit(dev, addr)];
zone->info = init_ctx->info[i];
/* TODO: add support for zone capacity less than zone size */
if (zone->info.capacity != ftl_get_num_blocks_in_zone(dev)) {
zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
SPDK_ERRLOG("Zone capacity is not equal zone size for "
"zone id: %"PRIu64"\n", init_ctx->zone_id);
}
if (zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE) {
band->num_zones++;
CIRCLEQ_INSERT_TAIL(&band->zones, zone, circleq);
}
}
init_ctx->zone_id = init_ctx->zone_id + num_zones * ftl_get_num_blocks_in_zone(dev);
ftl_dev_get_zone_info(init_ctx);
}
static void
ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
size_t zones_left, num_zones;
int rc;
zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
if (zones_left == 0) {
ftl_dev_init_state(init_ctx);
return;
}
num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
rc = spdk_bdev_get_zone_info(dev->base_bdev_desc, init_ctx->ioch,
init_ctx->zone_id, num_zones, init_ctx->info,
ftl_dev_get_zone_info_cb, init_ctx);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
ftl_init_fail(init_ctx);
}
}
static int
ftl_dev_init_zones(struct ftl_dev_init_ctx *init_ctx)
{
struct spdk_ftl_dev *dev = init_ctx->dev;
init_ctx->zone_id = 0;
init_ctx->ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
if (!init_ctx->ioch) {
SPDK_ERRLOG("Failed to get base bdev IO channel\n");
return -1;
}
ftl_dev_get_zone_info(init_ctx);
return 0;
}
static int
ftl_io_channel_create_cb(void *io_device, void *ctx)
{
struct spdk_ftl_dev *dev = io_device;
struct ftl_io_channel *ioch = ctx;
char mempool_name[32];
int rc;
rc = snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
if (rc < 0 || rc >= (int)sizeof(mempool_name)) {
SPDK_ERRLOG("Failed to create IO channel pool name\n");
return -1;
}
ioch->cache_ioch = NULL;
ioch->dev = dev;
ioch->elem_size = sizeof(struct ftl_md_io);
ioch->io_pool = spdk_mempool_create(mempool_name,
dev->conf.user_io_pool_size,
ioch->elem_size,
0,
SPDK_ENV_SOCKET_ID_ANY);
if (!ioch->io_pool) {
SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
return -1;
}
ioch->base_ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
if (!ioch->base_ioch) {
SPDK_ERRLOG("Failed to create base bdev IO channel\n");
goto fail_ioch;
}
if (ftl_dev_has_nv_cache(dev)) {
ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
if (!ioch->cache_ioch) {
SPDK_ERRLOG("Failed to create cache IO channel\n");
goto fail_cache;
}
}
TAILQ_INIT(&ioch->write_cmpl_queue);
TAILQ_INIT(&ioch->retry_queue);
ioch->poller = spdk_poller_register(ftl_io_channel_poll, ioch, 0);
if (!ioch->poller) {
SPDK_ERRLOG("Failed to register IO channel poller\n");
goto fail_poller;
}
return 0;
fail_poller:
if (ioch->cache_ioch) {
spdk_put_io_channel(ioch->cache_ioch);
}
fail_cache:
spdk_put_io_channel(ioch->base_ioch);
fail_ioch:
spdk_mempool_free(ioch->io_pool);
return -1;
}
static void
ftl_io_channel_destroy_cb(void *io_device, void *ctx)
{
struct ftl_io_channel *ioch = ctx;
spdk_poller_unregister(&ioch->poller);
spdk_mempool_free(ioch->io_pool);
spdk_put_io_channel(ioch->base_ioch);
if (ioch->cache_ioch) {
spdk_put_io_channel(ioch->cache_ioch);
}
}
static int
ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
{
spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
sizeof(struct ftl_io_channel),
NULL);
return 0;
}
static int
ftl_dev_init_base_bdev(struct spdk_ftl_dev *dev, const char *bdev_name)
{
uint32_t block_size;
uint64_t num_blocks;
struct spdk_bdev *bdev;
bdev = spdk_bdev_get_by_name(bdev_name);
if (!bdev) {
SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
return -1;
}
if (!spdk_bdev_is_zoned(bdev)) {
SPDK_ERRLOG("Bdev dosen't support zone capabilities: %s\n",
spdk_bdev_get_name(bdev));
return -1;
}
if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
dev, &dev->base_bdev_desc)) {
SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
return -1;
}
if (spdk_bdev_module_claim_bdev(bdev, dev->base_bdev_desc, &g_ftl_bdev_module)) {
spdk_bdev_close(dev->base_bdev_desc);
dev->base_bdev_desc = NULL;
SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
return -1;
}
dev->xfer_size = spdk_bdev_get_write_unit_size(bdev);
dev->md_size = spdk_bdev_get_md_size(bdev);
block_size = spdk_bdev_get_block_size(bdev);
if (block_size != FTL_BLOCK_SIZE) {
SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
return -1;
}
num_blocks = spdk_bdev_get_num_blocks(bdev);
if (num_blocks % ftl_get_num_punits(dev)) {
SPDK_ERRLOG("Unsupported geometry. Base bdev block count must be multiple "
"of optimal number of zones.\n");
return -1;
}
if (ftl_is_append_supported(dev) &&
!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
SPDK_ERRLOG("Bdev dosen't support append: %s\n",
spdk_bdev_get_name(bdev));
return -1;
}
dev->num_bands = num_blocks / (ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev));
dev->addr_len = spdk_u64log2(num_blocks) + 1;
return 0;
}
static void
ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
{
struct ftl_lba_map_request *request = obj;
spdk_bit_array_free(&request->segments);
}
static void
ftl_release_bdev(struct spdk_bdev_desc *bdev_desc)
{
if (!bdev_desc) {
return;
}
spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_desc));
spdk_bdev_close(bdev_desc);
}
static void
ftl_dev_free_sync(struct spdk_ftl_dev *dev)
{
struct spdk_ftl_dev *iter;
size_t i;
if (!dev) {
return;
}
pthread_mutex_lock(&g_ftl_queue_lock);
STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
if (iter == dev) {
STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
break;
}
}
pthread_mutex_unlock(&g_ftl_queue_lock);
assert(LIST_EMPTY(&dev->wptr_list));
assert(ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) == 0);
assert(ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER) == 0);
ftl_dev_dump_bands(dev);
ftl_dev_dump_stats(dev);
spdk_io_device_unregister(dev, NULL);
if (dev->core_thread.thread) {
ftl_dev_free_thread(dev, &dev->core_thread);
}
if (dev->bands) {
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
free(dev->bands[i].zone_buf);
spdk_bit_array_free(&dev->bands[i].lba_map.vld);
spdk_bit_array_free(&dev->bands[i].reloc_bitmap);
}
}
spdk_dma_free(dev->nv_cache.dma_buf);
spdk_mempool_free(dev->lba_pool);
spdk_mempool_free(dev->nv_cache.md_pool);
spdk_mempool_free(dev->media_events_pool);
if (dev->lba_request_pool) {
spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
}
spdk_mempool_free(dev->lba_request_pool);
ftl_rwb_free(dev->rwb);
ftl_reloc_free(dev->reloc);
ftl_release_bdev(dev->nv_cache.bdev_desc);
ftl_release_bdev(dev->base_bdev_desc);
free(dev->name);
free(dev->bands);
free(dev->l2p);
free(dev);
}
int
spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb_fn, void *cb_arg)
{
struct spdk_ftl_dev *dev;
struct spdk_ftl_dev_init_opts opts = *_opts;
struct ftl_dev_init_ctx *init_ctx = NULL;
int rc = -ENOMEM;
dev = calloc(1, sizeof(*dev));
if (!dev) {
return -ENOMEM;
}
init_ctx = calloc(1, sizeof(*init_ctx));
if (!init_ctx) {
goto fail_sync;
}
init_ctx->dev = dev;
init_ctx->opts = *_opts;
init_ctx->cb_fn = cb_fn;
init_ctx->cb_arg = cb_arg;
init_ctx->thread = spdk_get_thread();
if (!opts.conf) {
opts.conf = &g_default_conf;
}
if (!opts.base_bdev) {
SPDK_ERRLOG("Lack of underlying device in configuration\n");
rc = -EINVAL;
goto fail_sync;
}
dev->conf = *opts.conf;
dev->limit = SPDK_FTL_LIMIT_MAX;
dev->name = strdup(opts.name);
if (!dev->name) {
SPDK_ERRLOG("Unable to set device name\n");
goto fail_sync;
}
if (ftl_dev_init_base_bdev(dev, opts.base_bdev)) {
SPDK_ERRLOG("Unsupported underlying device\n");
goto fail_sync;
}
/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
/* so we don't have to clean up in each of the init functions. */
if (ftl_check_conf(dev, opts.conf)) {
SPDK_ERRLOG("Invalid device configuration\n");
goto fail_sync;
}
if (ftl_init_lba_map_pools(dev)) {
SPDK_ERRLOG("Unable to init LBA map pools\n");
goto fail_sync;
}
if (ftl_init_media_events_pool(dev)) {
SPDK_ERRLOG("Unable to init media events pools\n");
goto fail_sync;
}
ftl_init_wptr_list(dev);
if (ftl_dev_init_bands(dev)) {
SPDK_ERRLOG("Unable to initialize band array\n");
goto fail_sync;
}
if (ftl_dev_init_nv_cache(dev, opts.cache_bdev)) {
SPDK_ERRLOG("Unable to initialize persistent cache\n");
goto fail_sync;
}
dev->rwb = ftl_rwb_init(&dev->conf, dev->xfer_size, dev->md_size, ftl_get_num_punits(dev));
if (!dev->rwb) {
SPDK_ERRLOG("Unable to initialize rwb structures\n");
goto fail_sync;
}
dev->reloc = ftl_reloc_init(dev);
if (!dev->reloc) {
SPDK_ERRLOG("Unable to initialize reloc structures\n");
goto fail_sync;
}
if (ftl_dev_init_io_channel(dev)) {
SPDK_ERRLOG("Unable to initialize IO channels\n");
goto fail_sync;
}
if (ftl_dev_init_zones(init_ctx)) {
SPDK_ERRLOG("Failed to initialize zones\n");
goto fail_async;
}
return 0;
fail_sync:
ftl_dev_free_sync(dev);
ftl_dev_free_init_ctx(init_ctx);
return rc;
fail_async:
ftl_init_fail(init_ctx);
return 0;
}
static void
_ftl_halt_defrag(void *arg)
{
ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
}
static void
ftl_halt_complete_cb(void *ctx)
{
struct ftl_dev_init_ctx *fini_ctx = ctx;
ftl_dev_free_sync(fini_ctx->dev);
if (fini_ctx->cb_fn != NULL) {
fini_ctx->cb_fn(NULL, fini_ctx->cb_arg, fini_ctx->halt_complete_status);
}
ftl_dev_free_init_ctx(fini_ctx);
}
static void
ftl_nv_cache_header_fini_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct ftl_dev_init_ctx *fini_ctx = cb_arg;
int rc = 0;
spdk_bdev_free_io(bdev_io);
if (spdk_unlikely(!success)) {
SPDK_ERRLOG("Failed to write non-volatile cache metadata header\n");
rc = -EIO;
}
fini_ctx->halt_complete_status = rc;
spdk_thread_send_msg(fini_ctx->thread, ftl_halt_complete_cb, fini_ctx);
}
static int
ftl_halt_poller(void *ctx)
{
struct ftl_dev_init_ctx *fini_ctx = ctx;
struct spdk_ftl_dev *dev = fini_ctx->dev;
if (!dev->core_thread.poller) {
spdk_poller_unregister(&fini_ctx->poller);
if (ftl_dev_has_nv_cache(dev)) {
ftl_nv_cache_write_header(&dev->nv_cache, true,
ftl_nv_cache_header_fini_cb, fini_ctx);
} else {
fini_ctx->halt_complete_status = 0;
spdk_thread_send_msg(fini_ctx->thread, ftl_halt_complete_cb, fini_ctx);
}
}
return 0;
}
static void
ftl_add_halt_poller(void *ctx)
{
struct ftl_dev_init_ctx *fini_ctx = ctx;
struct spdk_ftl_dev *dev = fini_ctx->dev;
dev->halt = 1;
_ftl_halt_defrag(dev);
assert(!fini_ctx->poller);
fini_ctx->poller = spdk_poller_register(ftl_halt_poller, fini_ctx, 100);
}
static int
_spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
struct spdk_thread *thread)
{
struct ftl_dev_init_ctx *fini_ctx;
if (dev->halt_started) {
dev->halt_started = true;
return -EBUSY;
}
fini_ctx = calloc(1, sizeof(*fini_ctx));
if (!fini_ctx) {
return -ENOMEM;
}
fini_ctx->dev = dev;
fini_ctx->cb_fn = cb_fn;
fini_ctx->cb_arg = cb_arg;
fini_ctx->thread = thread;
ftl_rwb_disable_interleaving(dev->rwb);
spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, fini_ctx);
return 0;
}
int
spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg)
{
return _spdk_ftl_dev_free(dev, cb_fn, cb_arg, spdk_get_thread());
}
SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)