Spdk/lib/ftl/ftl_l2p_cache.c
Kozlowski Mateusz e7e5bc07b2 FTL: Add initial L2P cache logic
L2P cache allows for partial storing of L2P in memory, paging in and out
as necessary, lowering the total memory consumption.

Signed-off-by: Kozlowski Mateusz <mateusz.kozlowski@intel.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: I727fec9d2f0ade4ca73e872d62a2ec10cfdb0a88
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13353
Community-CI: Mellanox Build Bot
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2022-09-02 17:40:09 +00:00

362 lines
10 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) Intel Corporation.
* All rights reserved.
*/
#include "spdk/stdinc.h"
#include "spdk/cpuset.h"
#include "spdk/queue.h"
#include "spdk/thread.h"
#include "spdk/event.h"
#include "spdk/ftl.h"
#include "spdk/conf.h"
#include "spdk/env.h"
#include "spdk/util.h"
#include "ftl_core.h"
#include "ftl_l2p_cache.h"
#include "ftl_layout.h"
#include "ftl_nv_cache_io.h"
#include "mngt/ftl_mngt_steps.h"
#include "utils/ftl_defs.h"
#include "utils/ftl_addr_utils.h"
struct ftl_l2p_cache_page_io_ctx {
struct ftl_l2p_cache *cache;
uint64_t updates;
struct spdk_bdev_io_wait_entry bdev_io_wait;
};
enum ftl_l2p_page_state {
L2P_CACHE_PAGE_INIT, /* Page in memory not initialized from disk page */
L2P_CACHE_PAGE_READY, /* Page initialized from disk */
L2P_CACHE_PAGE_FLUSHING, /* Page is being flushed to disk and removed from memory */
L2P_CACHE_PAGE_PERSISTING, /* Page is being flushed to disk and not removed from memory */
L2P_CACHE_PAGE_CLEARING, /* Page is being initialized with INVALID addresses */
L2P_CACHE_PAGE_CORRUPTED /* Page corrupted */
};
struct ftl_l2p_page {
uint64_t updates; /* Number of times an L2P entry was updated in the page since it was last persisted */
TAILQ_HEAD(, ftl_l2p_page_wait_ctx) ppe_list; /* for deferred pins */
TAILQ_ENTRY(ftl_l2p_page) list_entry;
uint64_t page_no;
enum ftl_l2p_page_state state;
uint64_t pin_ref_cnt;
struct ftl_l2p_cache_page_io_ctx ctx;
bool on_rank_list;
void *page_buffer;
ftl_df_obj_id obj_id;
};
struct ftl_l2p_page_set;
struct ftl_l2p_page_wait_ctx {
uint16_t pg_pin_issued;
uint16_t pg_pin_completed;
struct ftl_l2p_page_set *parent;
uint64_t pg_no;
TAILQ_ENTRY(ftl_l2p_page_wait_ctx) list_entry;
};
/* A L2P page contains 1024 4B entries (or 512 8B ones for big drives).
* Currently internal IO will only pin 1 LBA at a time, so only one entry should be needed.
* User IO is split on internal xfer_size boundaries, which is currently set to 1MiB (256 blocks),
* so one entry should also be enough.
* TODO: We should probably revisit this though, when/if the xfer_size is based on io requirements of the
* bottom device (e.g. RAID5F), since then big IOs (especially unaligned ones) could potentially break this.
*/
#define L2P_MAX_PAGES_TO_PIN 4
struct ftl_l2p_page_set {
uint16_t to_pin_cnt;
uint16_t pinned_cnt;
uint16_t pin_fault_cnt;
uint8_t locked;
uint8_t deferred;
struct ftl_l2p_pin_ctx *pin_ctx;
TAILQ_ENTRY(ftl_l2p_page_set) list_entry;
struct ftl_l2p_page_wait_ctx entry[L2P_MAX_PAGES_TO_PIN];
};
struct ftl_l2p_l1_map_entry {
ftl_df_obj_id page_obj_id;
};
enum ftl_l2p_cache_state {
L2P_CACHE_INIT,
L2P_CACHE_RUNNING,
L2P_CACHE_IN_SHUTDOWN,
L2P_CACHE_SHUTDOWN_DONE,
};
struct ftl_l2p_cache_process_ctx {
int status;
ftl_l2p_cb cb;
void *cb_ctx;
uint64_t idx;
uint64_t qd;
};
struct ftl_l2p_cache {
struct spdk_ftl_dev *dev;
struct ftl_l2p_l1_map_entry *l2_mapping;
struct ftl_md *l2_md;
struct ftl_md *l2_ctx_md;
struct ftl_mempool *l2_ctx_pool;
struct ftl_md *l1_md;
TAILQ_HEAD(l2p_lru_list, ftl_l2p_page) lru_list;
/* TODO: A lot of / and % operations are done on this value, consider adding a shift based field and calculactions instead */
uint64_t lbas_in_page;
uint64_t num_pages; /* num pages to hold the entire L2P */
uint64_t ios_in_flight; /* Currently in flight IOs, to determine l2p shutdown readiness */
enum ftl_l2p_cache_state state;
uint32_t l2_pgs_avail;
uint32_t l2_pgs_evicting;
uint32_t l2_pgs_resident_max;
uint32_t evict_keep;
struct ftl_mempool *page_pinners_pool;
TAILQ_HEAD(, ftl_l2p_page_set) deferred_pinner_list; /* for deferred pinners */
/* This is a context for a management process */
struct ftl_l2p_cache_process_ctx mctx;
/* MD layout cache: Offset on a device in FTL_BLOCK_SIZE unit */
uint64_t cache_layout_offset;
/* MD layout cache: Device of region */
struct spdk_bdev_desc *cache_layout_bdev_desc;
/* MD layout cache: IO channel of region */
struct spdk_io_channel *cache_layout_ioch;
};
typedef void (*ftl_l2p_cache_clear_cb)(struct ftl_l2p_cache *cache, int status, void *ctx_page);
typedef void (*ftl_l2p_cache_persist_cb)(struct ftl_l2p_cache *cache, int status, void *ctx_page);
typedef void (*ftl_l2p_cache_sync_cb)(struct spdk_ftl_dev *dev, int status, void *page,
void *user_ctx);
static inline uint64_t
ftl_l2p_cache_get_l1_page_size(void)
{
return 1UL << 12;
}
static inline size_t
ftl_l2p_cache_get_page_all_size(void)
{
return sizeof(struct ftl_l2p_page) + ftl_l2p_cache_get_l1_page_size();
}
static void *
_ftl_l2p_cache_init(struct spdk_ftl_dev *dev, size_t addr_size, uint64_t l2p_size)
{
struct ftl_l2p_cache *cache;
uint64_t l2_pages = spdk_divide_round_up(l2p_size, ftl_l2p_cache_get_l1_page_size());
size_t l2_size = l2_pages * sizeof(struct ftl_l2p_l1_map_entry);
cache = calloc(1, sizeof(struct ftl_l2p_cache));
if (cache == NULL) {
return NULL;
}
cache->dev = dev;
cache->l2_md = ftl_md_create(dev,
spdk_divide_round_up(l2_size, FTL_BLOCK_SIZE), 0,
FTL_L2P_CACHE_MD_NAME_L2,
ftl_md_create_shm_flags(dev), NULL);
if (cache->l2_md == NULL) {
goto fail_l2_md;
}
cache->l2_mapping = ftl_md_get_buffer(cache->l2_md);
cache->lbas_in_page = dev->layout.l2p.lbas_in_page;
cache->num_pages = l2_pages;
return cache;
fail_l2_md:
free(cache);
return NULL;
}
int
ftl_l2p_cache_init(struct spdk_ftl_dev *dev)
{
uint64_t l2p_size = dev->num_lbas * dev->layout.l2p.addr_size;
struct ftl_l2p_cache *cache;
const struct ftl_layout_region *reg;
void *l2p = _ftl_l2p_cache_init(dev, dev->layout.l2p.addr_size, l2p_size);
size_t page_pinners_pool_size = 1 << 15;
size_t max_resident_size, max_resident_pgs;
if (!l2p) {
return -1;
}
dev->l2p = l2p;
cache = (struct ftl_l2p_cache *)dev->l2p;
cache->page_pinners_pool = ftl_mempool_create(page_pinners_pool_size,
sizeof(struct ftl_l2p_page_set),
64, SPDK_ENV_SOCKET_ID_ANY);
if (!cache->page_pinners_pool) {
return -1;
}
max_resident_size = dev->conf.l2p_dram_limit << 20;
max_resident_pgs = max_resident_size / ftl_l2p_cache_get_page_all_size();
if (max_resident_pgs > cache->num_pages) {
SPDK_NOTICELOG("l2p memory limit higher than entire L2P size\n");
max_resident_pgs = cache->num_pages;
}
/* Round down max res pgs to the nearest # of l2/l1 pgs */
max_resident_size = max_resident_pgs * ftl_l2p_cache_get_page_all_size();
SPDK_NOTICELOG("l2p maximum resident size is: %"PRIu64" (of %"PRIu64") MiB\n",
max_resident_size >> 20, dev->conf.l2p_dram_limit);
TAILQ_INIT(&cache->deferred_pinner_list);
TAILQ_INIT(&cache->lru_list);
cache->l2_ctx_md = ftl_md_create(dev,
spdk_divide_round_up(max_resident_pgs * SPDK_ALIGN_CEIL(sizeof(struct ftl_l2p_page), 64),
FTL_BLOCK_SIZE), 0, FTL_L2P_CACHE_MD_NAME_L2_CTX, ftl_md_create_shm_flags(dev), NULL);
if (cache->l2_ctx_md == NULL) {
return -1;
}
cache->l2_pgs_resident_max = max_resident_pgs;
cache->l2_pgs_avail = max_resident_pgs;
cache->l2_pgs_evicting = 0;
cache->l2_ctx_pool = ftl_mempool_create_ext(ftl_md_get_buffer(cache->l2_ctx_md),
max_resident_pgs, sizeof(struct ftl_l2p_page), 64);
if (cache->l2_ctx_pool == NULL) {
return -1;
}
#define FTL_L2P_CACHE_PAGE_AVAIL_MAX 16UL << 10
#define FTL_L2P_CACHE_PAGE_AVAIL_RATIO 5UL
cache->evict_keep = spdk_divide_round_up(cache->num_pages * FTL_L2P_CACHE_PAGE_AVAIL_RATIO, 100);
cache->evict_keep = spdk_min(FTL_L2P_CACHE_PAGE_AVAIL_MAX, cache->evict_keep);
if (!ftl_fast_startup(dev) && !ftl_fast_recovery(dev)) {
memset(cache->l2_mapping, (int)FTL_DF_OBJ_ID_INVALID, ftl_md_get_buffer_size(cache->l2_md));
ftl_mempool_initialize_ext(cache->l2_ctx_pool);
}
cache->l1_md = ftl_md_create(dev,
max_resident_pgs, 0,
FTL_L2P_CACHE_MD_NAME_L1,
ftl_md_create_shm_flags(dev), NULL);
if (cache->l1_md == NULL) {
return -1;
}
/* Cache MD layout */
reg = &dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P];
cache->cache_layout_offset = reg->current.offset;
cache->cache_layout_bdev_desc = reg->bdev_desc;
cache->cache_layout_ioch = reg->ioch;
cache->state = L2P_CACHE_RUNNING;
return 0;
}
static void
ftl_l2p_cache_deinit_l2(struct spdk_ftl_dev *dev, struct ftl_l2p_cache *cache)
{
ftl_md_destroy(cache->l2_ctx_md, ftl_md_destroy_shm_flags(dev));
cache->l2_ctx_md = NULL;
ftl_mempool_destroy_ext(cache->l2_ctx_pool);
cache->l2_ctx_pool = NULL;
ftl_md_destroy(cache->l1_md, ftl_md_destroy_shm_flags(dev));
cache->l1_md = NULL;
ftl_mempool_destroy(cache->page_pinners_pool);
cache->page_pinners_pool = NULL;
}
static void
_ftl_l2p_cache_deinit(struct spdk_ftl_dev *dev)
{
struct ftl_l2p_cache *cache = (struct ftl_l2p_cache *)dev->l2p;
ftl_l2p_cache_deinit_l2(dev, cache);
ftl_md_destroy(cache->l2_md, ftl_md_destroy_shm_flags(dev));
free(cache);
}
void
ftl_l2p_cache_deinit(struct spdk_ftl_dev *dev)
{
struct ftl_l2p_cache *cache = (struct ftl_l2p_cache *)dev->l2p;
if (!cache) {
return;
}
assert(cache->state == L2P_CACHE_SHUTDOWN_DONE || cache->state == L2P_CACHE_INIT);
_ftl_l2p_cache_deinit(dev);
dev->l2p = 0;
}
static void
clear_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
{
ftl_l2p_cb cb = md->owner.private;
void *cb_cntx = md->owner.cb_ctx;
cb(dev, status, cb_cntx);
}
void
ftl_l2p_cache_clear(struct spdk_ftl_dev *dev, ftl_l2p_cb cb, void *cb_ctx)
{
struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_L2P];
ftl_addr invalid_addr = FTL_ADDR_INVALID;
md->cb = clear_cb;
md->owner.cb_ctx = cb_ctx;
md->owner.private = cb;
ftl_md_clear(md, invalid_addr, NULL);
}
bool
ftl_l2p_cache_is_halted(struct spdk_ftl_dev *dev)
{
struct ftl_l2p_cache *cache = (struct ftl_l2p_cache *)dev->l2p;
return cache->state == L2P_CACHE_SHUTDOWN_DONE;
}
void
ftl_l2p_cache_halt(struct spdk_ftl_dev *dev)
{
struct ftl_l2p_cache *cache = (struct ftl_l2p_cache *)dev->l2p;
if (cache->state != L2P_CACHE_SHUTDOWN_DONE) {
cache->state = L2P_CACHE_IN_SHUTDOWN;
if (!cache->ios_in_flight && !cache->l2_pgs_evicting) {
cache->state = L2P_CACHE_SHUTDOWN_DONE;
}
}
}
void
ftl_l2p_cache_process(struct spdk_ftl_dev *dev)
{
struct ftl_l2p_cache *cache = dev->l2p;
if (spdk_unlikely(cache->state != L2P_CACHE_RUNNING)) {
return;
}
}