Spdk/lib/ftl/mngt/ftl_mngt_recovery.c
Kozlowski Mateusz bcdedd1a2b FTL: Add recovery iterations
In order to fit inside the maximum memory usage limit, recovery needs
to be split into multiple parts. During each iteration, part of L2P needs
to be read, modified as necessary and saved back to the cache.
This patch introduces the load/save steps, initialization of seq_id array
and valid map recovery. The actual L2P recovery is done in the followup patch.

Signed-off-by: Kozlowski Mateusz <mateusz.kozlowski@intel.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: I8ceadc5ef280542a173d83b932a983d5d86604a1
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13371
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2022-09-07 00:08:34 +00:00

613 lines
17 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) Intel Corporation.
* All rights reserved.
*/
#include "spdk/bdev_module.h"
#include "ftl_nv_cache.h"
#include "ftl_core.h"
#include "ftl_utils.h"
#include "ftl_band.h"
#include "ftl_internal.h"
#include "ftl_l2p_cache.h"
#include "ftl_mngt.h"
#include "ftl_mngt_steps.h"
#include "utils/ftl_addr_utils.h"
struct ftl_mngt_recovery_ctx {
/* Main recovery FTL management process */
struct ftl_mngt_process *main;
int status;
TAILQ_HEAD(, ftl_band) open_bands;
uint64_t open_bands_num;
struct {
struct ftl_layout_region region;
struct ftl_md *md;
uint64_t *l2p;
uint64_t *seq_id;
uint64_t count;
} l2p_snippet;
struct {
uint64_t block_limit;
uint64_t lba_first;
uint64_t lba_last;
uint32_t i;
} iter;
uint64_t p2l_ckpt_seq_id[FTL_LAYOUT_REGION_TYPE_P2L_COUNT];
};
static const struct ftl_mngt_process_desc g_desc_recovery_iteration;
static const struct ftl_mngt_process_desc g_desc_recovery;
static const struct ftl_mngt_process_desc g_desc_recovery_shm;
static bool
recovery_iter_done(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
{
return 0 == ctx->l2p_snippet.region.current.blocks;
}
static void
recovery_iter_advance(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
{
struct ftl_layout_region *region, *snippet;
uint64_t first_block, last_blocks;
ctx->iter.i++;
region = &dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P];
snippet = &ctx->l2p_snippet.region;
/* Advance processed blocks */
snippet->current.offset += snippet->current.blocks;
snippet->current.blocks = region->current.offset + region->current.blocks - snippet->current.offset;
snippet->current.blocks = spdk_min(snippet->current.blocks, ctx->iter.block_limit);
first_block = snippet->current.offset - region->current.offset;
ctx->iter.lba_first = first_block * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
last_blocks = first_block + snippet->current.blocks;
ctx->iter.lba_last = last_blocks * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
if (ctx->iter.lba_last > dev->num_lbas) {
ctx->iter.lba_last = dev->num_lbas;
}
}
static void
ftl_mngt_recovery_init(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
const uint64_t lbas_in_block = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
uint64_t mem_limit, lba_limit, l2p_limit, iterations, seq_limit;
uint64_t l2p_limit_block, seq_limit_block, md_blocks;
int md_flags;
ctx->main = mngt;
if (ftl_fast_recovery(dev)) {
/* If shared memory fast recovery then we don't need temporary buffers */
ftl_mngt_next_step(mngt);
return;
}
/*
* Recovery process allocates temporary buffers, to not exceed memory limit free L2P
* metadata buffers if they exist, they will be recreated in L2P initialization phase
*/
ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L1, ftl_md_create_shm_flags(dev));
ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2, ftl_md_create_shm_flags(dev));
ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2_CTX, ftl_md_create_shm_flags(dev));
/* Below values are in byte unit */
mem_limit = dev->conf.l2p_dram_limit * MiB;
mem_limit = spdk_min(mem_limit, spdk_divide_round_up(dev->num_lbas * dev->layout.l2p.addr_size,
MiB) * MiB);
lba_limit = mem_limit / (sizeof(uint64_t) + dev->layout.l2p.addr_size);
l2p_limit = lba_limit * dev->layout.l2p.addr_size;
iterations = spdk_divide_round_up(dev->num_lbas, lba_limit);
ctx->iter.block_limit = spdk_divide_round_up(l2p_limit, FTL_BLOCK_SIZE);
/* Round to block size */
ctx->l2p_snippet.count = ctx->iter.block_limit * lbas_in_block;
seq_limit = ctx->l2p_snippet.count * sizeof(uint64_t);
FTL_NOTICELOG(dev, "Recovery memory limit: %"PRIu64"MiB\n", (uint64_t)(mem_limit / MiB));
FTL_NOTICELOG(dev, "L2P resident size: %"PRIu64"MiB\n", (uint64_t)(l2p_limit / MiB));
FTL_NOTICELOG(dev, "Seq ID resident size: %"PRIu64"MiB\n", (uint64_t)(seq_limit / MiB));
FTL_NOTICELOG(dev, "Recovery iterations: %"PRIu64"\n", iterations);
dev->sb->ckpt_seq_id = 0;
/* Initialize region */
ctx->l2p_snippet.region = dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P];
/* Limit blocks in region, it will be needed for ftl_md_set_region */
ctx->l2p_snippet.region.current.blocks = ctx->iter.block_limit;
l2p_limit_block = ctx->iter.block_limit;
seq_limit_block = spdk_divide_round_up(seq_limit, FTL_BLOCK_SIZE);
md_blocks = l2p_limit_block + seq_limit_block;
md_flags = FTL_MD_CREATE_SHM | FTL_MD_CREATE_SHM_NEW;
/* Initialize snippet of L2P metadata */
ctx->l2p_snippet.md = ftl_md_create(dev, md_blocks, 0, "l2p_recovery", md_flags,
&ctx->l2p_snippet.region);
if (!ctx->l2p_snippet.md) {
ftl_mngt_fail_step(mngt);
return;
}
ctx->l2p_snippet.l2p = ftl_md_get_buffer(ctx->l2p_snippet.md);
/* Initialize recovery iterator, we call it with blocks set to zero,
* it means zero block done (processed), thanks that it will recalculate
* offsets and starting LBA to initial position */
ctx->l2p_snippet.region.current.blocks = 0;
recovery_iter_advance(dev, ctx);
/* Initialize snippet of sequence IDs */
ctx->l2p_snippet.seq_id = (uint64_t *)((char *)ftl_md_get_buffer(ctx->l2p_snippet.md) +
(l2p_limit_block * FTL_BLOCK_SIZE));
TAILQ_INIT(&ctx->open_bands);
ftl_mngt_next_step(mngt);
}
static void
ftl_mngt_recovery_deinit(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
ftl_md_destroy(ctx->l2p_snippet.md, 0);
ctx->l2p_snippet.md = NULL;
ctx->l2p_snippet.seq_id = NULL;
ftl_mngt_next_step(mngt);
}
static void
recovery_iteration_cb(struct spdk_ftl_dev *dev, void *_ctx, int status)
{
struct ftl_mngt_recovery_ctx *ctx = _ctx;
recovery_iter_advance(dev, ctx);
if (status) {
ftl_mngt_fail_step(ctx->main);
} else {
ftl_mngt_continue_step(ctx->main);
}
}
static void
ftl_mngt_recovery_run_iteration(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
if (ftl_fast_recovery(dev)) {
ftl_mngt_skip_step(mngt);
return;
}
if (recovery_iter_done(dev, ctx)) {
ftl_mngt_next_step(mngt);
} else {
ftl_mngt_process_execute(dev, &g_desc_recovery_iteration, recovery_iteration_cb, ctx);
}
}
static void
restore_band_state_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
{
struct ftl_mngt_process *mngt = md->owner.cb_ctx;
struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
struct ftl_band *band;
uint64_t num_bands = ftl_get_num_bands(dev);
uint64_t i;
if (status) {
/* Restore error, end step */
ftl_mngt_fail_step(mngt);
return;
}
for (i = 0; i < num_bands; i++) {
band = &dev->bands[i];
switch (band->md->state) {
case FTL_BAND_STATE_FREE:
ftl_band_initialize_free_state(band);
break;
case FTL_BAND_STATE_OPEN:
TAILQ_REMOVE(&band->dev->shut_bands, band, queue_entry);
TAILQ_INSERT_HEAD(&pctx->open_bands, band, queue_entry);
break;
case FTL_BAND_STATE_CLOSED:
break;
default:
status = -EINVAL;
}
}
if (status) {
ftl_mngt_fail_step(mngt);
} else {
ftl_mngt_next_step(mngt);
}
}
static void
ftl_mngt_recovery_restore_band_state(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_BAND_MD];
md->owner.cb_ctx = mngt;
md->cb = restore_band_state_cb;
ftl_md_restore(md);
}
static void
ftl_mngt_recovery_iteration_init_seq_ids(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
size_t size = sizeof(ctx->l2p_snippet.seq_id[0]) * ctx->l2p_snippet.count;
memset(ctx->l2p_snippet.seq_id, 0, size);
ftl_mngt_next_step(mngt);
}
static void
l2p_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
{
struct ftl_mngt_process *mngt = md->owner.cb_ctx;
if (status) {
ftl_mngt_fail_step(mngt);
} else {
ftl_mngt_next_step(mngt);
}
}
static void
ftl_mngt_recovery_iteration_load_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
struct ftl_md *md = ctx->l2p_snippet.md;
struct ftl_layout_region *region = &ctx->l2p_snippet.region;
FTL_NOTICELOG(dev, "L2P recovery, iteration %u\n", ctx->iter.i);
FTL_NOTICELOG(dev, "Load L2P, blocks [%"PRIu64", %"PRIu64"), LBAs [%"PRIu64", %"PRIu64")\n",
region->current.offset, region->current.offset + region->current.blocks,
ctx->iter.lba_first, ctx->iter.lba_last);
if (ftl_md_set_region(md, &ctx->l2p_snippet.region)) {
ftl_mngt_fail_step(mngt);
return;
}
md->owner.cb_ctx = mngt;
md->cb = l2p_cb;
ftl_md_restore(md);
}
static void
ftl_mngt_recovery_iteration_save_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
struct ftl_md *md = ctx->l2p_snippet.md;
md->owner.cb_ctx = mngt;
md->cb = l2p_cb;
ftl_md_persist(md);
}
static void
ftl_mngt_recovery_iteration_restore_valid_map(struct spdk_ftl_dev *dev,
struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
uint64_t lba, lba_off;
ftl_addr addr;
for (lba = pctx->iter.lba_first; lba < pctx->iter.lba_last; lba++) {
lba_off = lba - pctx->iter.lba_first;
addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
if (addr == FTL_ADDR_INVALID) {
continue;
}
if (!ftl_addr_in_nvc(dev, addr)) {
struct ftl_band *band = ftl_band_from_addr(dev, addr);
band->p2l_map.num_valid++;
}
if (ftl_bitmap_get(dev->valid_map, addr)) {
assert(false);
ftl_mngt_fail_step(mngt);
return;
} else {
ftl_bitmap_set(dev->valid_map, addr);
}
}
ftl_mngt_next_step(mngt);
}
static void
p2l_ckpt_preprocess(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *pctx)
{
uint64_t seq_id;
int md_region, ckpt_id;
for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
seq_id = ftl_mngt_p2l_ckpt_get_seq_id(dev, md_region);
pctx->p2l_ckpt_seq_id[ckpt_id] = seq_id;
FTL_NOTICELOG(dev, "P2L ckpt_id=%d found seq_id=%"PRIu64"\n", ckpt_id, seq_id);
}
}
static int
p2l_ckpt_restore_p2l(struct ftl_mngt_recovery_ctx *pctx, struct ftl_band *band)
{
uint64_t seq_id;
int md_region, ckpt_id;
memset(band->p2l_map.band_map, -1,
FTL_BLOCK_SIZE * ftl_p2l_map_num_blocks(band->dev));
for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
seq_id = pctx->p2l_ckpt_seq_id[ckpt_id];
if (seq_id == band->md->seq) {
FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u ckpt_id=%d seq_id=%"
PRIu64"\n", band->id, ckpt_id, seq_id);
return ftl_mngt_p2l_ckpt_restore(band, md_region, seq_id);
}
}
/* Band opened but no valid blocks within it, set write pointer to 0 */
ftl_band_iter_init(band);
FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u, band_seq_id=%"PRIu64" does not"
" match any P2L checkpoint\n", band->id, band->md->seq);
return 0;
}
static void
ftl_mngt_recovery_pre_process_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
p2l_ckpt_preprocess(dev, pctx);
ftl_mngt_next_step(mngt);
}
static void
ftl_mngt_recover_seq_id(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
ftl_recover_max_seq(dev);
ftl_mngt_next_step(mngt);
}
static void
ftl_mngt_recovery_open_bands_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
struct ftl_band *band;
if (TAILQ_EMPTY(&pctx->open_bands)) {
FTL_NOTICELOG(dev, "No more open bands to recover from P2L\n");
if (pctx->status) {
ftl_mngt_fail_step(mngt);
} else {
ftl_mngt_next_step(mngt);
}
return;
}
if (!ftl_mngt_get_step_ctx(mngt)) {
ftl_mngt_alloc_step_ctx(mngt, sizeof(bool));
/* Step first time called, initialize */
TAILQ_FOREACH(band, &pctx->open_bands, queue_entry) {
band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
if (ftl_band_alloc_p2l_map(band)) {
FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot allocate LBA map\n");
ftl_mngt_fail_step(mngt);
return;
}
if (p2l_ckpt_restore_p2l(pctx, band)) {
FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot restore P2L\n");
ftl_mngt_fail_step(mngt);
return;
}
if (!band->p2l_map.p2l_ckpt) {
band->p2l_map.p2l_ckpt = ftl_p2l_ckpt_acquire_region_type(dev, band->md->p2l_md_region);
if (!band->p2l_map.p2l_ckpt) {
FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot acquire P2L\n");
ftl_mngt_fail_step(mngt);
return;
}
}
}
}
band = TAILQ_FIRST(&pctx->open_bands);
if (ftl_band_filled(band, band->md->iter.offset)) {
band->md->state = FTL_BAND_STATE_FULL;
}
/* In a next step (finalize band initialization) this band will
* be assigned to the writer. So temporary we move this band
* to the closed list, and in the next step it will be moved to
* the writer from such list.
*/
TAILQ_REMOVE(&pctx->open_bands, band, queue_entry);
TAILQ_INSERT_TAIL(&dev->shut_bands, band, queue_entry);
FTL_NOTICELOG(dev, "Open band recovered, id = %u, seq id %"PRIu64", write offset %"PRIu64"\n",
band->id, band->md->seq, band->md->iter.offset);
ftl_mngt_continue_step(mngt);
}
static void
ftl_mngt_restore_valid_counters(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
ftl_valid_map_load_state(dev);
ftl_mngt_next_step(mngt);
}
static void
ftl_mngt_recovery_shm_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
if (ftl_fast_recovery(dev)) {
ftl_mngt_call_process(mngt, &g_desc_recovery_shm);
} else {
ftl_mngt_skip_step(mngt);
}
}
/*
* During dirty shutdown recovery, the whole L2P needs to be reconstructed. However,
* recreating it all at the same time may take up to much DRAM, so it's done in multiple
* iterations. This process describes the recovery of a part of L2P in one iteration.
*/
static const struct ftl_mngt_process_desc g_desc_recovery_iteration = {
.name = "FTL recovery iteration",
.steps = {
{
.name = "Load L2P",
.action = ftl_mngt_recovery_iteration_load_l2p,
},
{
.name = "Initialize sequence IDs",
.action = ftl_mngt_recovery_iteration_init_seq_ids,
},
{
.name = "Restore valid map",
.action = ftl_mngt_recovery_iteration_restore_valid_map,
},
{
.name = "Save L2P",
.action = ftl_mngt_recovery_iteration_save_l2p,
},
{}
}
};
/*
* Loading of FTL after dirty shutdown. Recovers metadata, L2P, decides on amount of recovery
* iterations to be executed (dependent on ratio of L2P cache size and total L2P size)
*/
static const struct ftl_mngt_process_desc g_desc_recovery = {
.name = "FTL recovery",
.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
.steps = {
{
.name = "Initialize recovery",
.action = ftl_mngt_recovery_init,
.cleanup = ftl_mngt_recovery_deinit
},
{
.name = "Recover band state",
.action = ftl_mngt_recovery_restore_band_state,
},
{
.name = "Initialize P2L checkpointing",
.action = ftl_mngt_p2l_init_ckpt,
.cleanup = ftl_mngt_p2l_deinit_ckpt
},
{
.name = "Restore P2L checkpoints",
.action = ftl_mngt_p2l_restore_ckpt
},
{
.name = "Preprocess P2L checkpoints",
.action = ftl_mngt_recovery_pre_process_p2l
},
{
.name = "Recover open bands P2L",
.action = ftl_mngt_recovery_open_bands_p2l
},
{
.name = "Recover max seq ID",
.action = ftl_mngt_recover_seq_id
},
{
.name = "Recovery iterations",
.action = ftl_mngt_recovery_run_iteration,
},
{
.name = "Deinitialize recovery",
.action = ftl_mngt_recovery_deinit
},
{
.name = "Initialize L2P",
.action = ftl_mngt_init_l2p,
.cleanup = ftl_mngt_deinit_l2p
},
{
.name = "Recover L2P from shared memory",
.action = ftl_mngt_recovery_shm_l2p,
},
{
.name = "Finalize band initialization",
.action = ftl_mngt_finalize_init_bands,
},
{
.name = "Free P2L region bufs",
.action = ftl_mngt_p2l_free_bufs,
},
{
.name = "Start core poller",
.action = ftl_mngt_start_core_poller,
.cleanup = ftl_mngt_stop_core_poller
},
{
.name = "Self test on startup",
.action = ftl_mngt_self_test
},
{
.name = "Finalize initialization",
.action = ftl_mngt_finalize_startup,
},
{}
}
};
/*
* Shared memory specific steps for dirty shutdown recovery - main task is rebuilding the state of
* L2P cache (paged in/out status, dirtiness etc. of individual pages).
*/
static const struct ftl_mngt_process_desc g_desc_recovery_shm = {
.name = "FTL recovery from SHM",
.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
.steps = {
{
.name = "Restore L2P from SHM",
.action = ftl_mngt_restore_l2p,
},
{
.name = "Restore valid maps counters",
.action = ftl_mngt_restore_valid_counters,
},
{}
}
};
void
ftl_mngt_recover(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
ftl_mngt_call_process(mngt, &g_desc_recovery);
}