L2P cache allows for partial storing of L2P in memory, paging in and out as necessary, lowering the total memory consumption. Signed-off-by: Kozlowski Mateusz <mateusz.kozlowski@intel.com> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> Change-Id: I727fec9d2f0ade4ca73e872d62a2ec10cfdb0a88 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13353 Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
362 lines
10 KiB
C
362 lines
10 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright (c) Intel Corporation.
|
|
* All rights reserved.
|
|
*/
|
|
|
|
#include "spdk/stdinc.h"
|
|
#include "spdk/cpuset.h"
|
|
#include "spdk/queue.h"
|
|
#include "spdk/thread.h"
|
|
#include "spdk/event.h"
|
|
#include "spdk/ftl.h"
|
|
#include "spdk/conf.h"
|
|
#include "spdk/env.h"
|
|
#include "spdk/util.h"
|
|
|
|
#include "ftl_core.h"
|
|
#include "ftl_l2p_cache.h"
|
|
#include "ftl_layout.h"
|
|
#include "ftl_nv_cache_io.h"
|
|
#include "mngt/ftl_mngt_steps.h"
|
|
#include "utils/ftl_defs.h"
|
|
#include "utils/ftl_addr_utils.h"
|
|
|
|
struct ftl_l2p_cache_page_io_ctx {
|
|
struct ftl_l2p_cache *cache;
|
|
uint64_t updates;
|
|
struct spdk_bdev_io_wait_entry bdev_io_wait;
|
|
};
|
|
|
|
enum ftl_l2p_page_state {
|
|
L2P_CACHE_PAGE_INIT, /* Page in memory not initialized from disk page */
|
|
L2P_CACHE_PAGE_READY, /* Page initialized from disk */
|
|
L2P_CACHE_PAGE_FLUSHING, /* Page is being flushed to disk and removed from memory */
|
|
L2P_CACHE_PAGE_PERSISTING, /* Page is being flushed to disk and not removed from memory */
|
|
L2P_CACHE_PAGE_CLEARING, /* Page is being initialized with INVALID addresses */
|
|
L2P_CACHE_PAGE_CORRUPTED /* Page corrupted */
|
|
};
|
|
|
|
struct ftl_l2p_page {
|
|
uint64_t updates; /* Number of times an L2P entry was updated in the page since it was last persisted */
|
|
TAILQ_HEAD(, ftl_l2p_page_wait_ctx) ppe_list; /* for deferred pins */
|
|
TAILQ_ENTRY(ftl_l2p_page) list_entry;
|
|
uint64_t page_no;
|
|
enum ftl_l2p_page_state state;
|
|
uint64_t pin_ref_cnt;
|
|
struct ftl_l2p_cache_page_io_ctx ctx;
|
|
bool on_rank_list;
|
|
void *page_buffer;
|
|
ftl_df_obj_id obj_id;
|
|
};
|
|
|
|
struct ftl_l2p_page_set;
|
|
|
|
struct ftl_l2p_page_wait_ctx {
|
|
uint16_t pg_pin_issued;
|
|
uint16_t pg_pin_completed;
|
|
struct ftl_l2p_page_set *parent;
|
|
uint64_t pg_no;
|
|
TAILQ_ENTRY(ftl_l2p_page_wait_ctx) list_entry;
|
|
};
|
|
|
|
/* A L2P page contains 1024 4B entries (or 512 8B ones for big drives).
|
|
* Currently internal IO will only pin 1 LBA at a time, so only one entry should be needed.
|
|
* User IO is split on internal xfer_size boundaries, which is currently set to 1MiB (256 blocks),
|
|
* so one entry should also be enough.
|
|
* TODO: We should probably revisit this though, when/if the xfer_size is based on io requirements of the
|
|
* bottom device (e.g. RAID5F), since then big IOs (especially unaligned ones) could potentially break this.
|
|
*/
|
|
#define L2P_MAX_PAGES_TO_PIN 4
|
|
struct ftl_l2p_page_set {
|
|
uint16_t to_pin_cnt;
|
|
uint16_t pinned_cnt;
|
|
uint16_t pin_fault_cnt;
|
|
uint8_t locked;
|
|
uint8_t deferred;
|
|
struct ftl_l2p_pin_ctx *pin_ctx;
|
|
TAILQ_ENTRY(ftl_l2p_page_set) list_entry;
|
|
struct ftl_l2p_page_wait_ctx entry[L2P_MAX_PAGES_TO_PIN];
|
|
};
|
|
|
|
struct ftl_l2p_l1_map_entry {
|
|
ftl_df_obj_id page_obj_id;
|
|
};
|
|
|
|
enum ftl_l2p_cache_state {
|
|
L2P_CACHE_INIT,
|
|
L2P_CACHE_RUNNING,
|
|
L2P_CACHE_IN_SHUTDOWN,
|
|
L2P_CACHE_SHUTDOWN_DONE,
|
|
};
|
|
|
|
struct ftl_l2p_cache_process_ctx {
|
|
int status;
|
|
ftl_l2p_cb cb;
|
|
void *cb_ctx;
|
|
uint64_t idx;
|
|
uint64_t qd;
|
|
};
|
|
|
|
struct ftl_l2p_cache {
|
|
struct spdk_ftl_dev *dev;
|
|
struct ftl_l2p_l1_map_entry *l2_mapping;
|
|
struct ftl_md *l2_md;
|
|
struct ftl_md *l2_ctx_md;
|
|
struct ftl_mempool *l2_ctx_pool;
|
|
struct ftl_md *l1_md;
|
|
|
|
TAILQ_HEAD(l2p_lru_list, ftl_l2p_page) lru_list;
|
|
/* TODO: A lot of / and % operations are done on this value, consider adding a shift based field and calculactions instead */
|
|
uint64_t lbas_in_page;
|
|
uint64_t num_pages; /* num pages to hold the entire L2P */
|
|
|
|
uint64_t ios_in_flight; /* Currently in flight IOs, to determine l2p shutdown readiness */
|
|
enum ftl_l2p_cache_state state;
|
|
uint32_t l2_pgs_avail;
|
|
uint32_t l2_pgs_evicting;
|
|
uint32_t l2_pgs_resident_max;
|
|
uint32_t evict_keep;
|
|
struct ftl_mempool *page_pinners_pool;
|
|
TAILQ_HEAD(, ftl_l2p_page_set) deferred_pinner_list; /* for deferred pinners */
|
|
|
|
/* This is a context for a management process */
|
|
struct ftl_l2p_cache_process_ctx mctx;
|
|
|
|
/* MD layout cache: Offset on a device in FTL_BLOCK_SIZE unit */
|
|
uint64_t cache_layout_offset;
|
|
|
|
/* MD layout cache: Device of region */
|
|
struct spdk_bdev_desc *cache_layout_bdev_desc;
|
|
|
|
/* MD layout cache: IO channel of region */
|
|
struct spdk_io_channel *cache_layout_ioch;
|
|
};
|
|
|
|
typedef void (*ftl_l2p_cache_clear_cb)(struct ftl_l2p_cache *cache, int status, void *ctx_page);
|
|
typedef void (*ftl_l2p_cache_persist_cb)(struct ftl_l2p_cache *cache, int status, void *ctx_page);
|
|
typedef void (*ftl_l2p_cache_sync_cb)(struct spdk_ftl_dev *dev, int status, void *page,
|
|
void *user_ctx);
|
|
|
|
static inline uint64_t
|
|
ftl_l2p_cache_get_l1_page_size(void)
|
|
{
|
|
return 1UL << 12;
|
|
}
|
|
|
|
static inline size_t
|
|
ftl_l2p_cache_get_page_all_size(void)
|
|
{
|
|
return sizeof(struct ftl_l2p_page) + ftl_l2p_cache_get_l1_page_size();
|
|
}
|
|
|
|
static void *
|
|
_ftl_l2p_cache_init(struct spdk_ftl_dev *dev, size_t addr_size, uint64_t l2p_size)
|
|
{
|
|
struct ftl_l2p_cache *cache;
|
|
uint64_t l2_pages = spdk_divide_round_up(l2p_size, ftl_l2p_cache_get_l1_page_size());
|
|
size_t l2_size = l2_pages * sizeof(struct ftl_l2p_l1_map_entry);
|
|
|
|
cache = calloc(1, sizeof(struct ftl_l2p_cache));
|
|
if (cache == NULL) {
|
|
return NULL;
|
|
}
|
|
cache->dev = dev;
|
|
|
|
cache->l2_md = ftl_md_create(dev,
|
|
spdk_divide_round_up(l2_size, FTL_BLOCK_SIZE), 0,
|
|
FTL_L2P_CACHE_MD_NAME_L2,
|
|
ftl_md_create_shm_flags(dev), NULL);
|
|
|
|
if (cache->l2_md == NULL) {
|
|
goto fail_l2_md;
|
|
}
|
|
cache->l2_mapping = ftl_md_get_buffer(cache->l2_md);
|
|
|
|
cache->lbas_in_page = dev->layout.l2p.lbas_in_page;
|
|
cache->num_pages = l2_pages;
|
|
|
|
return cache;
|
|
fail_l2_md:
|
|
free(cache);
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
ftl_l2p_cache_init(struct spdk_ftl_dev *dev)
|
|
{
|
|
uint64_t l2p_size = dev->num_lbas * dev->layout.l2p.addr_size;
|
|
struct ftl_l2p_cache *cache;
|
|
const struct ftl_layout_region *reg;
|
|
void *l2p = _ftl_l2p_cache_init(dev, dev->layout.l2p.addr_size, l2p_size);
|
|
size_t page_pinners_pool_size = 1 << 15;
|
|
size_t max_resident_size, max_resident_pgs;
|
|
|
|
if (!l2p) {
|
|
return -1;
|
|
}
|
|
dev->l2p = l2p;
|
|
|
|
cache = (struct ftl_l2p_cache *)dev->l2p;
|
|
cache->page_pinners_pool = ftl_mempool_create(page_pinners_pool_size,
|
|
sizeof(struct ftl_l2p_page_set),
|
|
64, SPDK_ENV_SOCKET_ID_ANY);
|
|
if (!cache->page_pinners_pool) {
|
|
return -1;
|
|
}
|
|
|
|
max_resident_size = dev->conf.l2p_dram_limit << 20;
|
|
max_resident_pgs = max_resident_size / ftl_l2p_cache_get_page_all_size();
|
|
|
|
if (max_resident_pgs > cache->num_pages) {
|
|
SPDK_NOTICELOG("l2p memory limit higher than entire L2P size\n");
|
|
max_resident_pgs = cache->num_pages;
|
|
}
|
|
|
|
/* Round down max res pgs to the nearest # of l2/l1 pgs */
|
|
max_resident_size = max_resident_pgs * ftl_l2p_cache_get_page_all_size();
|
|
SPDK_NOTICELOG("l2p maximum resident size is: %"PRIu64" (of %"PRIu64") MiB\n",
|
|
max_resident_size >> 20, dev->conf.l2p_dram_limit);
|
|
|
|
TAILQ_INIT(&cache->deferred_pinner_list);
|
|
TAILQ_INIT(&cache->lru_list);
|
|
|
|
cache->l2_ctx_md = ftl_md_create(dev,
|
|
spdk_divide_round_up(max_resident_pgs * SPDK_ALIGN_CEIL(sizeof(struct ftl_l2p_page), 64),
|
|
FTL_BLOCK_SIZE), 0, FTL_L2P_CACHE_MD_NAME_L2_CTX, ftl_md_create_shm_flags(dev), NULL);
|
|
|
|
if (cache->l2_ctx_md == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
cache->l2_pgs_resident_max = max_resident_pgs;
|
|
cache->l2_pgs_avail = max_resident_pgs;
|
|
cache->l2_pgs_evicting = 0;
|
|
cache->l2_ctx_pool = ftl_mempool_create_ext(ftl_md_get_buffer(cache->l2_ctx_md),
|
|
max_resident_pgs, sizeof(struct ftl_l2p_page), 64);
|
|
|
|
if (cache->l2_ctx_pool == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
#define FTL_L2P_CACHE_PAGE_AVAIL_MAX 16UL << 10
|
|
#define FTL_L2P_CACHE_PAGE_AVAIL_RATIO 5UL
|
|
cache->evict_keep = spdk_divide_round_up(cache->num_pages * FTL_L2P_CACHE_PAGE_AVAIL_RATIO, 100);
|
|
cache->evict_keep = spdk_min(FTL_L2P_CACHE_PAGE_AVAIL_MAX, cache->evict_keep);
|
|
|
|
if (!ftl_fast_startup(dev) && !ftl_fast_recovery(dev)) {
|
|
memset(cache->l2_mapping, (int)FTL_DF_OBJ_ID_INVALID, ftl_md_get_buffer_size(cache->l2_md));
|
|
ftl_mempool_initialize_ext(cache->l2_ctx_pool);
|
|
}
|
|
|
|
cache->l1_md = ftl_md_create(dev,
|
|
max_resident_pgs, 0,
|
|
FTL_L2P_CACHE_MD_NAME_L1,
|
|
ftl_md_create_shm_flags(dev), NULL);
|
|
|
|
if (cache->l1_md == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
/* Cache MD layout */
|
|
reg = &dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P];
|
|
cache->cache_layout_offset = reg->current.offset;
|
|
cache->cache_layout_bdev_desc = reg->bdev_desc;
|
|
cache->cache_layout_ioch = reg->ioch;
|
|
|
|
cache->state = L2P_CACHE_RUNNING;
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
ftl_l2p_cache_deinit_l2(struct spdk_ftl_dev *dev, struct ftl_l2p_cache *cache)
|
|
{
|
|
ftl_md_destroy(cache->l2_ctx_md, ftl_md_destroy_shm_flags(dev));
|
|
cache->l2_ctx_md = NULL;
|
|
|
|
ftl_mempool_destroy_ext(cache->l2_ctx_pool);
|
|
cache->l2_ctx_pool = NULL;
|
|
|
|
ftl_md_destroy(cache->l1_md, ftl_md_destroy_shm_flags(dev));
|
|
cache->l1_md = NULL;
|
|
|
|
ftl_mempool_destroy(cache->page_pinners_pool);
|
|
cache->page_pinners_pool = NULL;
|
|
}
|
|
|
|
static void
|
|
_ftl_l2p_cache_deinit(struct spdk_ftl_dev *dev)
|
|
{
|
|
struct ftl_l2p_cache *cache = (struct ftl_l2p_cache *)dev->l2p;
|
|
|
|
ftl_l2p_cache_deinit_l2(dev, cache);
|
|
ftl_md_destroy(cache->l2_md, ftl_md_destroy_shm_flags(dev));
|
|
free(cache);
|
|
}
|
|
|
|
void
|
|
ftl_l2p_cache_deinit(struct spdk_ftl_dev *dev)
|
|
{
|
|
struct ftl_l2p_cache *cache = (struct ftl_l2p_cache *)dev->l2p;
|
|
|
|
if (!cache) {
|
|
return;
|
|
}
|
|
assert(cache->state == L2P_CACHE_SHUTDOWN_DONE || cache->state == L2P_CACHE_INIT);
|
|
|
|
_ftl_l2p_cache_deinit(dev);
|
|
dev->l2p = 0;
|
|
}
|
|
|
|
static void
|
|
clear_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
|
|
{
|
|
ftl_l2p_cb cb = md->owner.private;
|
|
void *cb_cntx = md->owner.cb_ctx;
|
|
|
|
cb(dev, status, cb_cntx);
|
|
}
|
|
|
|
void
|
|
ftl_l2p_cache_clear(struct spdk_ftl_dev *dev, ftl_l2p_cb cb, void *cb_ctx)
|
|
{
|
|
struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_L2P];
|
|
ftl_addr invalid_addr = FTL_ADDR_INVALID;
|
|
|
|
md->cb = clear_cb;
|
|
md->owner.cb_ctx = cb_ctx;
|
|
md->owner.private = cb;
|
|
|
|
ftl_md_clear(md, invalid_addr, NULL);
|
|
}
|
|
|
|
bool
|
|
ftl_l2p_cache_is_halted(struct spdk_ftl_dev *dev)
|
|
{
|
|
struct ftl_l2p_cache *cache = (struct ftl_l2p_cache *)dev->l2p;
|
|
|
|
return cache->state == L2P_CACHE_SHUTDOWN_DONE;
|
|
}
|
|
|
|
void
|
|
ftl_l2p_cache_halt(struct spdk_ftl_dev *dev)
|
|
{
|
|
struct ftl_l2p_cache *cache = (struct ftl_l2p_cache *)dev->l2p;
|
|
|
|
if (cache->state != L2P_CACHE_SHUTDOWN_DONE) {
|
|
cache->state = L2P_CACHE_IN_SHUTDOWN;
|
|
if (!cache->ios_in_flight && !cache->l2_pgs_evicting) {
|
|
cache->state = L2P_CACHE_SHUTDOWN_DONE;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
ftl_l2p_cache_process(struct spdk_ftl_dev *dev)
|
|
{
|
|
struct ftl_l2p_cache *cache = dev->l2p;
|
|
|
|
if (spdk_unlikely(cache->state != L2P_CACHE_RUNNING)) {
|
|
return;
|
|
}
|
|
}
|