Spdk/lib/ftl/mngt/ftl_mngt_band.c
Kozlowski Mateusz c332181331 FTL: Move base device sb to LBA 0
Moving the superblock of the base device to sector 0, in order to
prevent other bdevs (e.g. GPT or blobstore) from potentially hijacking
the base device during startup (if their metadata by 'luck' manages to
find itself at sector 0 of band 0, which depending on the order of
operations could be very likely).

Signed-off-by: Kozlowski Mateusz <mateusz.kozlowski@intel.com>
Change-Id: I8a6eb3c89a229f443ef23d975a8ff0880ba65b08
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14143
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2022-09-20 19:24:26 +00:00

430 lines
10 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) Intel Corporation.
* All rights reserved.
*/
#include "ftl_core.h"
#include "ftl_mngt_steps.h"
#include "ftl_band.h"
#include "ftl_internal.h"
static int
ftl_band_init_md(struct ftl_band *band)
{
struct spdk_ftl_dev *dev = band->dev;
struct ftl_p2l_map *p2l_map = &band->p2l_map;
struct ftl_md *band_info_md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_BAND_MD];
struct ftl_md *valid_map_md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_VALID_MAP];
uint64_t band_num_blocks = ftl_get_num_blocks_in_band(band->dev);
size_t band_valid_map_bytes;
struct ftl_band_md *band_md = ftl_md_get_buffer(band_info_md);
if (band_num_blocks % (ftl_bitmap_buffer_alignment * 8)) {
FTL_ERRLOG(dev, "The number of blocks in band is not divisible by bitmap word bits\n");
return -EINVAL;
}
band_valid_map_bytes = band_num_blocks / 8;
p2l_map->valid = ftl_bitmap_create(ftl_md_get_buffer(valid_map_md) +
band->start_addr / 8, band_valid_map_bytes);
if (!p2l_map->valid) {
return -ENOMEM;
}
band->md = &band_md[band->id];
if (!ftl_fast_startup(dev)) {
band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
}
return 0;
}
static int
ftl_dev_init_bands(struct spdk_ftl_dev *dev)
{
struct ftl_band *band;
uint64_t i;
TAILQ_INIT(&dev->free_bands);
TAILQ_INIT(&dev->shut_bands);
dev->num_free = 0;
dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
if (!dev->bands) {
return -ENOMEM;
}
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
band = &dev->bands[i];
band->id = i;
band->dev = dev;
/* Adding to shut_bands is necessary - see ftl_restore_band_close_cb() */
TAILQ_INSERT_TAIL(&dev->shut_bands, band, queue_entry);
}
return 0;
}
static int
ftl_dev_init_bands_md(struct spdk_ftl_dev *dev)
{
uint64_t i;
int rc = 0;
for (i = 0; i < ftl_get_num_bands(dev); ++i) {
rc = ftl_band_init_md(&dev->bands[i]);
if (rc) {
FTL_ERRLOG(dev, "Failed to initialize metadata structures for band [%lu]\n", i);
break;
}
}
return rc;
}
static void
ftl_dev_deinit_bands(struct spdk_ftl_dev *dev)
{
free(dev->bands);
}
static void
ftl_dev_deinit_bands_md(struct spdk_ftl_dev *dev)
{
if (dev->bands) {
uint64_t i;
for (i = 0; i < dev->num_bands; ++i) {
struct ftl_band *band = &dev->bands[i];
ftl_bitmap_destroy(band->p2l_map.valid);
band->p2l_map.valid = NULL;
band->md = NULL;
}
}
}
void
ftl_mngt_init_bands(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
if (ftl_dev_init_bands(dev)) {
ftl_mngt_fail_step(mngt);
} else {
ftl_mngt_next_step(mngt);
}
}
void
ftl_mngt_init_bands_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
if (ftl_dev_init_bands_md(dev)) {
ftl_mngt_fail_step(mngt);
} else {
ftl_mngt_next_step(mngt);
}
}
void
ftl_mngt_deinit_bands(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
ftl_dev_deinit_bands(dev);
ftl_mngt_next_step(mngt);
}
void
ftl_mngt_deinit_bands_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
ftl_dev_deinit_bands_md(dev);
ftl_mngt_next_step(mngt);
}
/*
* For grouping multiple logical bands (1GiB) to make any IOs more sequential from the drive's
* perspective. Improves WAF.
*/
#define BASE_BDEV_RECLAIM_UNIT_SIZE (72 * GiB)
static void
decorate_bands(struct spdk_ftl_dev *dev)
{
struct ftl_band *band;
uint64_t i, num_to_drop, phys_id = 0;
uint64_t num_blocks, num_bands;
uint64_t num_blocks_in_band = ftl_get_num_blocks_in_band(dev);
uint64_t reclaim_unit_num_blocks = BASE_BDEV_RECLAIM_UNIT_SIZE / FTL_BLOCK_SIZE;
uint32_t num_logical_in_phys = 2;
assert(reclaim_unit_num_blocks % num_blocks_in_band == 0);
num_blocks = spdk_bdev_get_num_blocks(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
/* For base bdev bigger than 1TB take reclaim uint size for grouping GC bands */
if (num_blocks > (TiB / FTL_BLOCK_SIZE)) {
assert(reclaim_unit_num_blocks < num_blocks);
num_logical_in_phys = reclaim_unit_num_blocks / num_blocks_in_band;
}
num_to_drop = ftl_get_num_bands(dev) % num_logical_in_phys;
i = 0;
while (i < ftl_get_num_bands(dev) - num_to_drop) {
band = &dev->bands[i];
band->phys_id = phys_id;
i++;
if (i % num_logical_in_phys == 0) {
phys_id++;
}
}
/* Mark not aligned logical bands as broken */
num_bands = ftl_get_num_bands(dev);
while (i < num_bands) {
band = &dev->bands[i];
dev->num_bands--;
TAILQ_REMOVE(&dev->shut_bands, band, queue_entry);
i++;
}
dev->num_logical_bands_in_physical = num_logical_in_phys;
}
void
ftl_mngt_decorate_bands(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
decorate_bands(dev);
ftl_mngt_next_step(mngt);
}
void
ftl_mngt_initialize_band_address(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_band *band;
struct ftl_md *data_md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_DATA_BASE];
uint64_t i;
for (i = 0; i < ftl_get_num_bands(dev); i++) {
band = &dev->bands[i];
band->start_addr = data_md->region->current.offset + i * dev->num_blocks_in_band;
band->tail_md_addr = ftl_band_tail_md_addr(band);
}
ftl_mngt_next_step(mngt);
}
void
ftl_recover_max_seq(struct spdk_ftl_dev *dev)
{
struct ftl_band *band;
size_t band_close_seq_id = 0, band_open_seq_id = 0;
size_t chunk_close_seq_id = 0, chunk_open_seq_id = 0;
size_t max = 0;
TAILQ_FOREACH(band, &dev->shut_bands, queue_entry) {
band_open_seq_id = spdk_max(band_open_seq_id, band->md->seq);
band_close_seq_id = spdk_max(band_close_seq_id, band->md->close_seq_id);
}
ftl_nv_cache_get_max_seq_id(&dev->nv_cache, &chunk_open_seq_id, &chunk_close_seq_id);
dev->nv_cache.last_seq_id = chunk_close_seq_id;
dev->writer_gc.last_seq_id = band_close_seq_id;
dev->writer_user.last_seq_id = band_close_seq_id;
max = spdk_max(max, band_open_seq_id);
max = spdk_max(max, band_close_seq_id);
max = spdk_max(max, chunk_open_seq_id);
max = spdk_max(max, chunk_close_seq_id);
dev->sb->seq_id = max;
}
static int
_band_cmp(const void *_a, const void *_b)
{
struct ftl_band *a, *b;
a = *((struct ftl_band **)_a);
b = *((struct ftl_band **)_b);
return a->md->seq - b->md->seq;
}
static struct ftl_band *
next_high_prio_band(struct spdk_ftl_dev *dev)
{
struct ftl_band *result = NULL, *band;
uint64_t validity = UINT64_MAX;
TAILQ_FOREACH(band, &dev->shut_bands, queue_entry) {
if (band->p2l_map.num_valid < validity) {
result = band;
validity = result->p2l_map.num_valid;
}
}
return result;
}
static int
finalize_init_gc(struct spdk_ftl_dev *dev)
{
struct ftl_band *band;
uint64_t free_blocks, blocks_to_move;
ftl_band_init_gc_iter(dev);
dev->sb_shm->gc_info.band_id_high_prio = FTL_BAND_ID_INVALID;
if (0 == dev->num_free) {
/* Get number of available blocks in writer */
free_blocks = ftl_writer_get_free_blocks(&dev->writer_gc);
/*
* First, check a band candidate to GC
*/
band = ftl_band_search_next_to_reloc(dev);
ftl_bug(NULL == band);
blocks_to_move = band->p2l_map.num_valid;
if (blocks_to_move <= free_blocks) {
/* This GC band can be moved */
return 0;
}
/*
* The GC candidate cannot be moved because no enough space. We need to find
* another band.
*/
band = next_high_prio_band(dev);
ftl_bug(NULL == band);
if (band->p2l_map.num_valid > free_blocks) {
FTL_ERRLOG(dev, "CRITICAL ERROR, no more free bands and cannot start\n");
return -1;
} else {
/* GC needs to start using this band */
dev->sb_shm->gc_info.band_id_high_prio = band->id;
}
}
return 0;
}
void
ftl_mngt_finalize_init_bands(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
{
struct ftl_band *band, *temp_band, *open_bands[FTL_MAX_OPEN_BANDS];
struct ftl_writer *writer;
uint64_t i, num_open = 0, num_shut = 0;
uint64_t offset;
bool fast_startup = ftl_fast_startup(dev);
ftl_recover_max_seq(dev);
TAILQ_FOREACH_SAFE(band, &dev->free_bands, queue_entry, temp_band) {
band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
}
TAILQ_FOREACH_SAFE(band, &dev->shut_bands, queue_entry, temp_band) {
if (band->md->state == FTL_BAND_STATE_OPEN ||
band->md->state == FTL_BAND_STATE_FULL) {
TAILQ_REMOVE(&dev->shut_bands, band, queue_entry);
open_bands[num_open++] = band;
assert(num_open <= FTL_MAX_OPEN_BANDS);
continue;
}
if (dev->conf.mode & SPDK_FTL_MODE_CREATE) {
TAILQ_REMOVE(&dev->shut_bands, band, queue_entry);
assert(band->md->state == FTL_BAND_STATE_FREE);
band->md->state = FTL_BAND_STATE_CLOSED;
ftl_band_set_state(band, FTL_BAND_STATE_FREE);
} else {
num_shut++;
}
band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
}
/* Assign open bands to writers and alloc necessary resources */
qsort(open_bands, num_open, sizeof(open_bands[0]), _band_cmp);
for (i = 0; i < num_open; ++i) {
band = open_bands[i];
if (band->md->type == FTL_BAND_TYPE_COMPACTION) {
writer = &dev->writer_user;
} else if (band->md->type == FTL_BAND_TYPE_GC) {
writer = &dev->writer_gc;
} else {
assert(false);
}
if (band->md->state == FTL_BAND_STATE_FULL) {
TAILQ_INSERT_TAIL(&writer->full_bands, band, queue_entry);
} else {
if (writer->band == NULL) {
writer->band = band;
} else {
writer->next_band = band;
}
}
writer->num_bands++;
ftl_band_set_owner(band, ftl_writer_band_state_change, writer);
if (fast_startup) {
FTL_NOTICELOG(dev, "SHM: band open P2L map df_id 0x%"PRIx64"\n", band->md->df_p2l_map);
if (ftl_band_open_p2l_map(band)) {
ftl_mngt_fail_step(mngt);
return;
}
offset = band->md->iter.offset;
ftl_band_iter_init(band);
ftl_band_iter_set(band, offset);
ftl_mngt_p2l_ckpt_restore_shm_clean(band);
} else if (dev->sb->clean) {
band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
if (ftl_band_alloc_p2l_map(band)) {
ftl_mngt_fail_step(mngt);
return;
}
offset = band->md->iter.offset;
ftl_band_iter_init(band);
ftl_band_iter_set(band, offset);
if (ftl_mngt_p2l_ckpt_restore_clean(band)) {
ftl_mngt_fail_step(mngt);
return;
}
}
}
if (fast_startup) {
ftl_mempool_initialize_ext(dev->p2l_pool);
}
/* Recalculate number of free bands */
dev->num_free = 0;
TAILQ_FOREACH(band, &dev->free_bands, queue_entry) {
assert(band->md->state == FTL_BAND_STATE_FREE);
dev->num_free++;
}
ftl_apply_limits(dev);
if ((num_shut + num_open + dev->num_free) != ftl_get_num_bands(dev)) {
FTL_ERRLOG(dev, "ERROR, band list inconsistent state\n");
ftl_mngt_fail_step(mngt);
return;
}
if (finalize_init_gc(dev)) {
ftl_mngt_fail_step(mngt);
} else {
ftl_mngt_next_step(mngt);
}
}