From 7a7ac2af3342c6df0dde897b78a2026da3c99f94 Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Mon, 20 Jun 2022 12:31:56 +0200 Subject: [PATCH] ftl: metadata utils and initialization Signed-off-by: Artur Paszkiewicz Signed-off-by: Kozlowski Mateusz Change-Id: Iaa9d7dd3f9e3147f0acfe18e23506a33fe3fd5a3 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13293 Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker Reviewed-by: Jim Harris Reviewed-by: Konrad Sztyber Tested-by: SPDK CI Jenkins --- lib/ftl/Makefile | 2 +- lib/ftl/ftl_layout.h | 3 + lib/ftl/ftl_utils.h | 1 + lib/ftl/mngt/ftl_mngt_bdev.c | 6 + lib/ftl/mngt/ftl_mngt_md.c | 58 +++ lib/ftl/mngt/ftl_mngt_startup.c | 5 + lib/ftl/mngt/ftl_mngt_steps.h | 4 + lib/ftl/utils/ftl_md.c | 855 ++++++++++++++++++++++++++++++++ lib/ftl/utils/ftl_md.h | 271 ++++++++++ 9 files changed, 1204 insertions(+), 1 deletion(-) create mode 100644 lib/ftl/utils/ftl_md.c create mode 100644 lib/ftl/utils/ftl_md.h diff --git a/lib/ftl/Makefile b/lib/ftl/Makefile index 478b5f561..3f3274c33 100644 --- a/lib/ftl/Makefile +++ b/lib/ftl/Makefile @@ -20,7 +20,7 @@ FTL_SUBDIRS := mngt utils C_SRCS = ftl_core.c ftl_init.c ftl_layout.c C_SRCS += mngt/ftl_mngt.c mngt/ftl_mngt_bdev.c mngt/ftl_mngt_shutdown.c mngt/ftl_mngt_startup.c C_SRCS += mngt/ftl_mngt_md.c -C_SRCS += utils/ftl_conf.c +C_SRCS += utils/ftl_conf.c utils/ftl_md.c SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_ftl.map) diff --git a/lib/ftl/ftl_layout.h b/lib/ftl/ftl_layout.h index 9e14ff0af..bba81add3 100644 --- a/lib/ftl/ftl_layout.h +++ b/lib/ftl/ftl_layout.h @@ -101,6 +101,9 @@ struct ftl_layout { } l2p; struct ftl_layout_region region[FTL_LAYOUT_REGION_TYPE_MAX]; + + /* Metadata object corresponding to the regions */ + struct ftl_md *md[FTL_LAYOUT_REGION_TYPE_MAX]; }; /** diff --git a/lib/ftl/ftl_utils.h b/lib/ftl/ftl_utils.h index d7fe42299..c00c2b879 100644 --- a/lib/ftl/ftl_utils.h +++ b/lib/ftl/ftl_utils.h @@ -8,5 +8,6 @@ #include "utils/ftl_defs.h" #include "utils/ftl_conf.h" +#include "utils/ftl_md.h" #endif /* FTL_FTL_UTILS_H */ diff --git a/lib/ftl/mngt/ftl_mngt_bdev.c b/lib/ftl/mngt/ftl_mngt_bdev.c index b209773c6..b9ff48b44 100644 --- a/lib/ftl/mngt/ftl_mngt_bdev.c +++ b/lib/ftl/mngt/ftl_mngt_bdev.c @@ -11,6 +11,7 @@ #include "ftl_internal.h" #include "ftl_core.h" #include "utils/ftl_defs.h" +#include "utils/ftl_md.h" #define MINIMUM_CACHE_SIZE_GIB 5 #define MINIMUM_BASE_SIZE_GIB 20 @@ -210,6 +211,11 @@ ftl_mngt_open_cache_bdev(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt } dev->cache_md_size = spdk_bdev_get_md_size(bdev); + if (dev->cache_md_size != sizeof(union ftl_md_vss)) { + FTL_ERRLOG(dev, "Bdev's %s metadata is invalid size (%"PRIu32")\n", + spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev)); + goto error; + } if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) { FTL_ERRLOG(dev, "Unsupported DIF type used by bdev %s\n", diff --git a/lib/ftl/mngt/ftl_mngt_md.c b/lib/ftl/mngt/ftl_mngt_md.c index e2697a318..15162adb3 100644 --- a/lib/ftl/mngt/ftl_mngt_md.c +++ b/lib/ftl/mngt/ftl_mngt_md.c @@ -21,3 +21,61 @@ ftl_mngt_init_layout(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt) ftl_mngt_next_step(mngt); } } + +static bool +is_buffer_needed(enum ftl_layout_region_type type) +{ + switch (type) { + case FTL_LAYOUT_REGION_TYPE_DATA_NVC: + case FTL_LAYOUT_REGION_TYPE_DATA_BASE: + return false; + + default: + return true; + } +} + +void +ftl_mngt_init_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt) +{ + struct ftl_layout *layout = &dev->layout; + struct ftl_layout_region *region = layout->region; + uint64_t i; + + for (i = 0; i < FTL_LAYOUT_REGION_TYPE_MAX; i++, region++) { + if (layout->md[i]) { + /* + * Some metadata objects are initialized by other FTL + * components. At the moment it's only used by superblock (and its mirror) - + * during load time we need to read it earlier in order to get the layout for the + * other regions. + */ + continue; + } + layout->md[i] = ftl_md_create(dev, region->current.blocks, region->vss_blksz, region->name, + !is_buffer_needed(i), region); + if (NULL == layout->md[i]) { + ftl_mngt_fail_step(mngt); + return; + } + } + + ftl_mngt_next_step(mngt); +} + +void +ftl_mngt_deinit_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt) +{ + struct ftl_layout *layout = &dev->layout; + struct ftl_layout_region *region = layout->region; + uint64_t i; + + for (i = 0; i < FTL_LAYOUT_REGION_TYPE_MAX; i++, region++) { + if (layout->md[i]) { + ftl_md_destroy(layout->md[i]); + layout->md[i] = NULL; + } + } + + ftl_mngt_next_step(mngt); +} diff --git a/lib/ftl/mngt/ftl_mngt_startup.c b/lib/ftl/mngt/ftl_mngt_startup.c index 8c1119d54..4a281b848 100644 --- a/lib/ftl/mngt/ftl_mngt_startup.c +++ b/lib/ftl/mngt/ftl_mngt_startup.c @@ -26,6 +26,11 @@ static const struct ftl_mngt_process_desc desc_startup = { .name = "Initialize layout", .action = ftl_mngt_init_layout }, + { + .name = "Initialize metadata", + .action = ftl_mngt_init_md, + .cleanup = ftl_mngt_deinit_md + }, {} } }; diff --git a/lib/ftl/mngt/ftl_mngt_steps.h b/lib/ftl/mngt/ftl_mngt_steps.h index eade7e270..3651807dd 100644 --- a/lib/ftl/mngt/ftl_mngt_steps.h +++ b/lib/ftl/mngt/ftl_mngt_steps.h @@ -18,6 +18,10 @@ void ftl_mngt_close_cache_bdev(struct spdk_ftl_dev *dev, struct ftl_mngt_process void ftl_mngt_init_layout(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt); +void ftl_mngt_init_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt); + +void ftl_mngt_deinit_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt); + void ftl_mngt_rollback_device(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt); #endif /* FTL_MNGT_STEPS_H */ diff --git a/lib/ftl/utils/ftl_md.c b/lib/ftl/utils/ftl_md.c new file mode 100644 index 000000000..5f830db99 --- /dev/null +++ b/lib/ftl/utils/ftl_md.c @@ -0,0 +1,855 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ + +#include "spdk/env.h" +#include "spdk/bdev_module.h" + +#include "ftl_core.h" +#include "ftl_md.h" +#include "ftl_utils.h" + +struct ftl_md; +static void io_submit(struct ftl_md *md); +static void io_done(struct ftl_md *md); + +static bool +has_mirror(struct ftl_md *md) +{ + if (md->region) { + if (md->region->mirror_type != FTL_LAYOUT_REGION_TYPE_INVALID) { + return md->mirror_enabled; + } + } + + return false; +} + +static int +setup_mirror(struct ftl_md *md) +{ + if (!md->mirror) { + md->mirror = calloc(1, sizeof(*md->mirror)); + if (!md->mirror) { + return -ENOMEM; + } + md->mirror_enabled = true; + } + + md->mirror->dev = md->dev; + md->mirror->data_blocks = md->data_blocks; + md->mirror->data = md->data; + md->mirror->vss_data = md->vss_data; + + /* Set proper region in secondary object */ + assert(md->region->mirror_type != FTL_LAYOUT_REGION_TYPE_INVALID); + md->mirror->region = &md->dev->layout.region[md->region->mirror_type]; + + return 0; +} + +uint64_t +ftl_md_xfer_blocks(struct spdk_ftl_dev *dev) +{ + return 4ULL * dev->xfer_size; +} + +static uint64_t +xfer_size(struct ftl_md *md) +{ + return ftl_md_xfer_blocks(md->dev) * FTL_BLOCK_SIZE; +} + +struct ftl_md *ftl_md_create(struct spdk_ftl_dev *dev, uint64_t blocks, + uint64_t vss_blksz, const char *name, bool no_mem, + const struct ftl_layout_region *region) +{ + struct ftl_md *md; + + md = calloc(1, sizeof(*md)); + if (!md) { + return NULL; + } + md->dev = dev; + md->data_blocks = blocks; + md->mirror_enabled = true; + + if (!no_mem) { + size_t buf_size = md->data_blocks * (FTL_BLOCK_SIZE + vss_blksz); + int ret; + + ret = posix_memalign((void **)&md->data, FTL_BLOCK_SIZE, buf_size); + if (ret) { + free(md); + return NULL; + } + memset(md->data, 0, buf_size); + + if (vss_blksz) { + md->vss_data = ((char *)md->data) + md->data_blocks * FTL_BLOCK_SIZE; + } + } + + if (region) { + size_t entry_vss_buf_size = vss_blksz * region->entry_size; + + if (entry_vss_buf_size) { + md->entry_vss_dma_buf = spdk_malloc(entry_vss_buf_size, FTL_BLOCK_SIZE, + NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + if (!md->entry_vss_dma_buf) { + goto err; + } + } + + if (ftl_md_set_region(md, region)) { + goto err; + } + } + + return md; +err: + ftl_md_destroy(md); + return NULL; +} + +void +ftl_md_destroy(struct ftl_md *md) +{ + if (!md) { + return; + } + + ftl_md_free_buf(md); + + spdk_free(md->entry_vss_dma_buf); + + free(md->mirror); + free(md); +} + +void +ftl_md_free_buf(struct ftl_md *md) +{ + if (!md) { + return; + } + + if (md->data) { + free(md->data); + md->data = NULL; + md->vss_data = NULL; + } +} + +void * +ftl_md_get_buffer(struct ftl_md *md) +{ + return md->data; +} + +uint64_t +ftl_md_get_buffer_size(struct ftl_md *md) +{ + return md->data_blocks * FTL_BLOCK_SIZE; +} + +static void +ftl_md_vss_buf_init(union ftl_md_vss *buf, uint32_t count, + const union ftl_md_vss *vss_pattern) +{ + while (count) { + count--; + buf[count] = *vss_pattern; + } +} + +union ftl_md_vss *ftl_md_vss_buf_alloc(struct ftl_layout_region *region, uint32_t count) +{ + union ftl_md_vss *buf = spdk_zmalloc(count * FTL_MD_VSS_SZ, FTL_BLOCK_SIZE, NULL, + SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + + if (!buf) { + return NULL; + } + + union ftl_md_vss vss_buf = {0}; + vss_buf.version.md_version = region->current.version; + ftl_md_vss_buf_init(buf, count, &vss_buf); + return buf; +} + +union ftl_md_vss *ftl_md_get_vss_buffer(struct ftl_md *md) +{ + return md->vss_data; +} + +static void +io_cleanup(struct ftl_md *md) +{ + spdk_dma_free(md->io.data); + md->io.data = NULL; + + spdk_dma_free(md->io.md); + md->io.md = NULL; +} + +static void +exception(void *arg) +{ + struct ftl_md *md = arg; + + md->cb(md->dev, md, -EINVAL); + io_cleanup(md); +} + +static void +audit_md_vss_version(struct ftl_md *md, uint64_t blocks) +{ +#if defined(DEBUG) + union ftl_md_vss *vss = md->io.md; + while (blocks) { + blocks--; + assert(vss[blocks].version.md_version == md->region->current.version); + } +#endif +} + +static void +read_write_blocks_cb(struct spdk_bdev_io *bdev_io, bool success, void *arg) +{ + struct ftl_md *md = arg; + + if (spdk_unlikely(!success)) { + if (md->io.op == FTL_MD_OP_RESTORE && has_mirror(md)) { + md->io.status = -EAGAIN; + } else { + md->io.status = -EIO; + } + } else { + uint64_t blocks = bdev_io->u.bdev.num_blocks; + uint64_t size = blocks * FTL_BLOCK_SIZE; + + if (md->io.op == FTL_MD_OP_RESTORE) { + memcpy(md->data + md->io.data_offset, md->io.data, size); + + if (md->vss_data) { + uint64_t vss_offset = md->io.data_offset / FTL_BLOCK_SIZE; + vss_offset *= FTL_MD_VSS_SZ; + audit_md_vss_version(md, blocks); + memcpy(md->vss_data + vss_offset, md->io.md, blocks * FTL_MD_VSS_SZ); + } + } + + md->io.address += blocks; + md->io.remaining -= blocks; + md->io.data_offset += size; + } + + spdk_bdev_free_io(bdev_io); + + io_submit(md); +} + +static inline int +read_blocks(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + void *buf, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + if (md_buf) { + return spdk_bdev_read_blocks_with_md(desc, ch, buf, md_buf, + offset_blocks, num_blocks, + cb, cb_arg); + } else { + return spdk_bdev_read_blocks(desc, ch, buf, + offset_blocks, num_blocks, + cb, cb_arg); + } +} + +static inline int +write_blocks(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + void *buf, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + if (md_buf) { + return spdk_bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, + num_blocks, cb, cb_arg); + } else { + return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); + } +} + +static void +read_write_blocks(void *_md) +{ + struct ftl_md *md = _md; + const struct ftl_layout_region *region = md->region; + uint64_t blocks; + int rc = 0; + + blocks = spdk_min(md->io.remaining, ftl_md_xfer_blocks(md->dev)); + + switch (md->io.op) { + case FTL_MD_OP_RESTORE: + rc = read_blocks(md->dev, region->bdev_desc, region->ioch, + md->io.data, md->io.md, + md->io.address, blocks, + read_write_blocks_cb, md); + break; + case FTL_MD_OP_PERSIST: + case FTL_MD_OP_CLEAR: + rc = write_blocks(md->dev, region->bdev_desc, region->ioch, + md->io.data, md->io.md, + md->io.address, blocks, + read_write_blocks_cb, md); + break; + default: + ftl_abort(); + } + + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(region->bdev_desc); + md->io.bdev_io_wait.bdev = bdev; + md->io.bdev_io_wait.cb_fn = read_write_blocks; + md->io.bdev_io_wait.cb_arg = md; + spdk_bdev_queue_io_wait(bdev, region->ioch, &md->io.bdev_io_wait); + } else { + ftl_abort(); + } + } +} + +static void +io_submit(struct ftl_md *md) +{ + if (!md->io.remaining || md->io.status) { + io_done(md); + return; + } + + if (md->io.op == FTL_MD_OP_PERSIST) { + uint64_t blocks = spdk_min(md->io.remaining, ftl_md_xfer_blocks(md->dev)); + + memcpy(md->io.data, md->data + md->io.data_offset, FTL_BLOCK_SIZE * blocks); + + if (md->vss_data) { + uint64_t vss_offset = md->io.data_offset / FTL_BLOCK_SIZE; + vss_offset *= FTL_MD_VSS_SZ; + assert(md->io.md); + memcpy(md->io.md, md->vss_data + vss_offset, FTL_MD_VSS_SZ * blocks); + audit_md_vss_version(md, blocks); + } + } +#if defined(DEBUG) + if (md->io.md && md->io.op == FTL_MD_OP_CLEAR) { + uint64_t blocks = spdk_min(md->io.remaining, ftl_md_xfer_blocks(md->dev)); + audit_md_vss_version(md, blocks); + } +#endif + + read_write_blocks(md); +} + +static int +io_can_start(struct ftl_md *md) +{ + assert(NULL == md->io.data); + if (NULL != md->io.data) { + /* Outgoing IO on metadata */ + return -EINVAL; + } + + if (!md->region) { + /* No device region to process data */ + return -EINVAL; + } + + if (md->region->current.blocks > md->data_blocks) { + /* No device region to process data */ + FTL_ERRLOG(md->dev, "Blocks number mismatch between metadata object and" + "device region\n"); + return -EINVAL; + } + + return 0; +} + +static int +io_prepare(struct ftl_md *md, enum ftl_md_ops op) +{ + const struct ftl_layout_region *region = md->region; + uint64_t data_size, meta_size = 0; + + /* Allocates buffer for IO */ + data_size = xfer_size(md); + md->io.data = spdk_zmalloc(data_size, FTL_BLOCK_SIZE, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (!md->io.data) { + return -ENOMEM; + } + + if (md->vss_data || md->region->vss_blksz) { + meta_size = ftl_md_xfer_blocks(md->dev) * FTL_MD_VSS_SZ; + md->io.md = spdk_zmalloc(meta_size, FTL_BLOCK_SIZE, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (!md->io.md) { + spdk_dma_free(md->io.data); + md->io.data = NULL; + return -ENOMEM; + } + } + + md->io.address = region->current.offset; + md->io.remaining = region->current.blocks; + md->io.data_offset = 0; + md->io.status = 0; + md->io.op = op; + + return 0; +} + +static int +io_init(struct ftl_md *md, enum ftl_md_ops op) +{ + if (io_can_start(md)) { + return -EINVAL; + } + + if (io_prepare(md, op)) { + return -ENOMEM; + } + + return 0; +} + +static uint64_t +persist_entry_lba(struct ftl_md *md, uint64_t start_entry) +{ + return md->region->current.offset + start_entry * md->region->entry_size; +} + +static void +persist_entry_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct ftl_md_io_entry_ctx *ctx = cb_arg; + + spdk_bdev_free_io(bdev_io); + + assert(ctx->remaining > 0); + ctx->remaining--; + + if (!success) { + ctx->status = -EIO; + } + + if (!ctx->remaining) { + ctx->cb(ctx->status, ctx->cb_arg); + } +} + +static int +ftl_md_persist_entry_write_blocks(struct ftl_md_io_entry_ctx *ctx, struct ftl_md *md, + spdk_bdev_io_wait_cb retry_fn) +{ + int rc; + + rc = write_blocks(md->dev, md->region->bdev_desc, md->region->ioch, + ctx->buffer, ctx->vss_buffer, + persist_entry_lba(md, ctx->start_entry), md->region->entry_size, + persist_entry_cb, ctx); + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(md->region->bdev_desc); + ctx->bdev_io_wait.bdev = bdev; + ctx->bdev_io_wait.cb_fn = retry_fn; + ctx->bdev_io_wait.cb_arg = ctx; + spdk_bdev_queue_io_wait(bdev, md->region->ioch, &ctx->bdev_io_wait); + } else { + ftl_abort(); + } + } + + return rc; +} + +static void +ftl_md_persist_entry_mirror(void *_ctx) +{ + struct ftl_md_io_entry_ctx *ctx = _ctx; + + ftl_md_persist_entry_write_blocks(ctx, ctx->md->mirror, ftl_md_persist_entry_mirror); +} + +static void +ftl_md_persist_entry_primary(void *_ctx) +{ + struct ftl_md_io_entry_ctx *ctx = _ctx; + struct ftl_md *md = ctx->md; + int rc; + + rc = ftl_md_persist_entry_write_blocks(ctx, md, ftl_md_persist_entry_primary); + + if (!rc && has_mirror(md)) { + assert(md->region->entry_size == md->mirror->region->entry_size); + + /* The MD object has mirror so execute persist on it too */ + ftl_md_persist_entry_mirror(ctx); + ctx->remaining++; + } +} + +static void +_ftl_md_persist_entry(struct ftl_md_io_entry_ctx *ctx) +{ + ctx->status = 0; + ctx->remaining = 1; + + /* First execute an IO to the primary region */ + ftl_md_persist_entry_primary(ctx); +} + +void +ftl_md_persist_entry(struct ftl_md *md, uint64_t start_entry, void *buffer, void *vss_buffer, + ftl_md_io_entry_cb cb, void *cb_arg, + struct ftl_md_io_entry_ctx *ctx) +{ + if (spdk_unlikely(0 == md->region->entry_size)) { + /* This MD has not been configured to support persist entry call */ + ftl_abort(); + } + + /* Initialize persist entry context */ + ctx->cb = cb; + ctx->cb_arg = cb_arg; + ctx->md = md; + ctx->start_entry = start_entry; + ctx->buffer = buffer; + ctx->vss_buffer = vss_buffer ? : md->entry_vss_dma_buf; + + _ftl_md_persist_entry(ctx); +} + +static void +read_entry_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct ftl_md_io_entry_ctx *ctx = cb_arg; + struct ftl_md *md = ctx->md; + + spdk_bdev_free_io(bdev_io); + + if (!success) { + if (has_mirror(md)) { + if (setup_mirror(md)) { + /* An error when setup the mirror */ + ctx->status = -EIO; + goto finish_io; + } + + /* First read from the mirror */ + ftl_md_read_entry(md->mirror, ctx->start_entry, ctx->buffer, ctx->vss_buffer, + ctx->cb, ctx->cb_arg, + ctx); + return; + } else { + ctx->status = -EIO; + goto finish_io; + } + } + +finish_io: + ctx->cb(ctx->status, ctx->cb_arg); +} + +static void +ftl_md_read_entry_read_blocks(struct ftl_md_io_entry_ctx *ctx, struct ftl_md *md, + spdk_bdev_io_wait_cb retry_fn) +{ + int rc; + + rc = read_blocks(md->dev, md->region->bdev_desc, md->region->ioch, + ctx->buffer, ctx->vss_buffer, + persist_entry_lba(md, ctx->start_entry), md->region->entry_size, + read_entry_cb, ctx); + + if (spdk_unlikely(rc)) { + if (rc == -ENOMEM) { + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(md->region->bdev_desc); + ctx->bdev_io_wait.bdev = bdev; + ctx->bdev_io_wait.cb_fn = retry_fn; + ctx->bdev_io_wait.cb_arg = ctx; + spdk_bdev_queue_io_wait(bdev, md->region->ioch, &ctx->bdev_io_wait); + } else { + ftl_abort(); + } + } +} + +static void +_ftl_md_read_entry(void *_ctx) +{ + struct ftl_md_io_entry_ctx *ctx = _ctx; + + ftl_md_read_entry_read_blocks(ctx, ctx->md, _ftl_md_read_entry); +} + +void +ftl_md_read_entry(struct ftl_md *md, uint64_t start_entry, void *buffer, void *vss_buffer, + ftl_md_io_entry_cb cb, void *cb_arg, + struct ftl_md_io_entry_ctx *ctx) +{ + if (spdk_unlikely(0 == md->region->entry_size)) { + /* This MD has not been configured to support read entry call */ + ftl_abort(); + } + + ctx->cb = cb; + ctx->cb_arg = cb_arg; + ctx->md = md; + ctx->start_entry = start_entry; + ctx->buffer = buffer; + ctx->vss_buffer = vss_buffer; + + _ftl_md_read_entry(ctx); +} + +void +ftl_md_persist_entry_retry(struct ftl_md_io_entry_ctx *ctx) +{ + _ftl_md_persist_entry(ctx); +} + +static void +persist_mirror_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status) +{ + struct ftl_md *primary = md->owner.private; + + if (status) { + /* We got an error, stop persist procedure immediately */ + primary->io.status = status; + io_done(primary); + } else { + /* Now continue the persist procedure on the primary MD object */ + if (0 == io_init(primary, FTL_MD_OP_PERSIST)) { + io_submit(primary); + } else { + spdk_thread_send_msg(spdk_get_thread(), exception, primary); + } + } +} + +void +ftl_md_persist(struct ftl_md *md) +{ + if (has_mirror(md)) { + if (setup_mirror(md)) { + /* An error when setup the mirror */ + spdk_thread_send_msg(spdk_get_thread(), exception, md); + return; + } + + /* Set callback and context in mirror */ + md->mirror->cb = persist_mirror_cb; + md->mirror->owner.private = md; + + /* First persist the mirror */ + ftl_md_persist(md->mirror); + return; + } + + if (0 == io_init(md, FTL_MD_OP_PERSIST)) { + io_submit(md); + } else { + spdk_thread_send_msg(spdk_get_thread(), exception, md); + } +} + +static void +restore_mirror_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status) +{ + struct ftl_md *primary = md->owner.private; + + if (status) { + /* Cannot restore the object from the mirror too, mark error and fail */ + primary->io.status = -EIO; + io_done(primary); + } else { + /* + * Restoring from the mirror successful. Synchronize mirror to the primary. + * Because we read MD content from the mirror, we can disable it, only the primary + * requires persisting. + */ + primary->io.status = 0; + primary->mirror_enabled = false; + io_cleanup(primary); + ftl_md_persist(primary); + primary->mirror_enabled = true; + } +} + +static int +restore_done(struct ftl_md *md) +{ + if (-EAGAIN == md->io.status) { + /* Failed to read MD from primary region, try it from mirror. + * At the moment read the mirror entirely, (TODO) in the + * feature we can restore from primary and mirror region + * with finer granularity. + */ + + if (has_mirror(md)) { + if (setup_mirror(md)) { + /* An error when setup the mirror */ + return -EIO; + } + + /* Set callback and context in mirror */ + md->mirror->cb = restore_mirror_cb; + md->mirror->owner.private = md; + + /* First persist the mirror */ + ftl_md_restore(md->mirror); + return -EAGAIN; + } else { + return -EIO; + } + } + + return md->io.status; +} + +static void +io_done(struct ftl_md *md) +{ + int status; + + if (md->io.op == FTL_MD_OP_RESTORE) { + status = restore_done(md); + } else { + status = md->io.status; + } + + if (status != -EAGAIN) { + md->cb(md->dev, md, status); + io_cleanup(md); + } +} + +void +ftl_md_restore(struct ftl_md *md) +{ + if (0 == io_init(md, FTL_MD_OP_RESTORE)) { + io_submit(md); + } else { + spdk_thread_send_msg(spdk_get_thread(), exception, md); + } +} + +static int +pattern_prepare(struct ftl_md *md, + int data_pattern, union ftl_md_vss *vss_pattern) +{ + void *data = md->io.data; + uint64_t data_size = xfer_size(md); + + memset(data, data_pattern, data_size); + + if (md->io.md) { + if (vss_pattern) { + /* store the VSS pattern... */ + ftl_md_vss_buf_init(md->io.md, ftl_md_xfer_blocks(md->dev), vss_pattern); + } else { + /* ...or default init VSS to 0 */ + union ftl_md_vss vss = {0}; + + vss.version.md_version = md->region->current.version; + ftl_md_vss_buf_init(md->io.md, ftl_md_xfer_blocks(md->dev), &vss); + } + } + + return 0; +} + +static void +clear_mirror_cb(struct spdk_ftl_dev *dev, struct ftl_md *secondary, int status) +{ + struct ftl_md *primary = secondary->owner.private; + + if (status) { + /* We got an error, stop persist procedure immediately */ + primary->io.status = status; + io_done(primary); + } else { + /* Now continue the persist procedure on the primary MD object */ + if (0 == io_init(primary, FTL_MD_OP_CLEAR) && + 0 == pattern_prepare(primary, *(int *)secondary->io.data, + secondary->io.md)) { + io_submit(primary); + } else { + spdk_thread_send_msg(spdk_get_thread(), exception, primary); + } + } +} + +void +ftl_md_clear(struct ftl_md *md, int data_pattern, union ftl_md_vss *vss_pattern) +{ + if (has_mirror(md)) { + if (setup_mirror(md)) { + /* An error when setup the mirror */ + spdk_thread_send_msg(spdk_get_thread(), exception, md); + return; + } + + /* Set callback and context in mirror */ + md->mirror->cb = clear_mirror_cb; + md->mirror->owner.private = md; + + /* First persist the mirror */ + ftl_md_clear(md->mirror, data_pattern, vss_pattern); + return; + } + + if (0 == io_init(md, FTL_MD_OP_CLEAR) && 0 == pattern_prepare(md, data_pattern, vss_pattern)) { + io_submit(md); + } else { + spdk_thread_send_msg(spdk_get_thread(), exception, md); + } +} + +const struct ftl_layout_region * +ftl_md_get_region(struct ftl_md *md) +{ + return md->region; +} + +int +ftl_md_set_region(struct ftl_md *md, + const struct ftl_layout_region *region) +{ + assert(region->current.blocks <= md->data_blocks); + md->region = region; + + if (md->vss_data) { + union ftl_md_vss vss = {0}; + vss.version.md_version = region->current.version; + ftl_md_vss_buf_init(md->vss_data, md->data_blocks, &vss); + if (region->entry_size) { + assert(md->entry_vss_dma_buf); + ftl_md_vss_buf_init(md->entry_vss_dma_buf, region->entry_size, &vss); + } + } + + if (has_mirror(md)) { + return setup_mirror(md); + } + + return 0; +} diff --git a/lib/ftl/utils/ftl_md.h b/lib/ftl/utils/ftl_md.h new file mode 100644 index 000000000..7d5a77c20 --- /dev/null +++ b/lib/ftl/utils/ftl_md.h @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) Intel Corporation. + * All rights reserved. + */ +#ifndef FTL_MD_H +#define FTL_MD_H + +#include "spdk/stdinc.h" + +#include "ftl_layout.h" + +struct ftl_md; +struct spdk_ftl_dev; + +typedef void (*ftl_md_cb)(struct spdk_ftl_dev *dev, struct ftl_md *md, int status); + +enum ftl_md_ops { + FTL_MD_OP_RESTORE, + FTL_MD_OP_PERSIST, + FTL_MD_OP_CLEAR, +}; + +/* FTL metadata container which allows to store/restore/recover */ +struct ftl_md { + /* Context of owner (Caller of restore/persist/clear operation) */ + struct { + /* Private context of the metadata's owner */ + void *private; + + /* Additional context of the owner */ + void *cb_ctx; + } owner; + + /* Callback for signaling end of procedures like restore, persist, or clear */ + ftl_md_cb cb; + + /* Pointer to the FTL device */ + struct spdk_ftl_dev *dev; + + /* Region of device on which store/restore the metadata */ + const struct ftl_layout_region *region; + + /* Pointer to data */ + void *data; + + /* Size of buffer in FTL block size unit */ + uint64_t data_blocks; + + /* Pointer to VSS metadata data */ + void *vss_data; + + /* Default DMA buffer for VSS of a single entry. Used by ftl_md_persist_entry(). */ + void *entry_vss_dma_buf; + + /* Fields for doing IO */ + struct { + void *data; + void *md; + uint64_t address; + uint64_t remaining; + uint64_t data_offset; + int status; + enum ftl_md_ops op; + struct spdk_bdev_io_wait_entry bdev_io_wait; + } io; + + /* Metadata primary object */ + struct ftl_md *mirror; + + /* This flag is used by the primary to disable mirror temporarily */ + bool mirror_enabled; +}; + +typedef void (*ftl_md_io_entry_cb)(int status, void *cb_arg); + +struct ftl_md_io_entry_ctx { + uint32_t remaining; + int status; + ftl_md_io_entry_cb cb; + void *cb_arg; + struct ftl_md *md; + uint64_t start_entry; + void *buffer; + void *vss_buffer; + struct spdk_bdev_io_wait_entry bdev_io_wait; +}; + +#define FTL_MD_VSS_SZ 64 +union ftl_md_vss { + struct { + uint8_t unused[FTL_MD_VSS_SZ - sizeof(uint64_t)]; + uint64_t md_version; + } version; + + struct { + uint64_t start_lba; + uint64_t num_blocks; + } unmap; + + struct { + uint64_t lba; + } nv_cache; +}; + +SPDK_STATIC_ASSERT(sizeof(union ftl_md_vss) == FTL_MD_VSS_SZ, "Invalid md vss size"); + +/** + * @brief Creates FTL metadata + * + * @param dev The FTL device + * @param blocks Size of buffer in FTL block size unit + * @param vss_blksz Size of VSS MD + * @param name Name of the object being created + * @param no_mem If true metadata will be created without memory allocation + * @param region Region associated with FTL metadata + * + * @note if buffer is NULL, the buffer will be allocated internally by the object + * + * @return FTL metadata + */ +struct ftl_md *ftl_md_create(struct spdk_ftl_dev *dev, uint64_t blocks, + uint64_t vss_blksz, const char *name, bool no_mem, + const struct ftl_layout_region *region); + +/** + * @brief Destroys metadata + * + * @param md Metadata to be destroyed + */ +void ftl_md_destroy(struct ftl_md *md); + +/** + * @brief Free the data buf associated with the metadata + * + * @param md Metadata object + */ +void ftl_md_free_buf(struct ftl_md *md); + +/** + * @brief Sets the region of a device on which to perform IO when persisting, + * restoring, or clearing. + * + * @param md The FTL metadata + * @param region The device region to be set + * + * @return Operation status + */ +int ftl_md_set_region(struct ftl_md *md, + const struct ftl_layout_region *region); + +/** + * @brief Gets layout region on which ongoing an IO procedure is executed + * + * @param md Metadata object + * + * @return Layout region + */ +const struct ftl_layout_region *ftl_md_get_region(struct ftl_md *md); + +/** + * @brief Gets metadata's data buffer + * + * @param md The FTL metadata + * + * @result FTL metadata data buffer + */ +void *ftl_md_get_buffer(struct ftl_md *md); + +/** + * @brief Gets metadata object corresponding buffer size + * + * @param md The FTL metadata + * + * @return Buffer size + */ +uint64_t ftl_md_get_buffer_size(struct ftl_md *md); + +/** + * @brief Heap allocate and initialize a vss buffer for MD region. + * + * The buffer is aligned to FTL_BLOCK_SIZE. + * The buffer is zeroed. + * The VSS version is inherited from the MD region. + * + * @param region The MD region + * @param count Number of VSS items to allocate + * + * @return VSS buffer + */ +union ftl_md_vss *ftl_md_vss_buf_alloc(struct ftl_layout_region *region, uint32_t count); + +/** + * @brief Get the VSS metadata data buffer + * + * @param md The FTL metadata + * + * @return VSS metadata data buffer + */ +union ftl_md_vss *ftl_md_get_vss_buffer(struct ftl_md *md); + +/** + * Restores metadata from the region which is set + * + * @param md Metadata to be restored + */ +void ftl_md_restore(struct ftl_md *md); + +/** + * Persists all metadata to the region which is set + * + * @param md Metadata to be persisted + */ +void ftl_md_persist(struct ftl_md *md); + +/** + * Persists given entries in metadata to the region which is set + * + * @param md Metadata to be persisted + * @param start_entry Starting index of entry to be persisted + * @param buffer DMA buffer for writing the entry to the device + * @param vss_buffer DMA buffer for writing the entry VSS to the device + * @param cb Completion called on persist entry end + * @param cb_arg Context returned on completion + * @param ctx Operation context structure + */ +void ftl_md_persist_entry(struct ftl_md *md, uint64_t start_entry, void *buffer, void *vss_buffer, + ftl_md_io_entry_cb cb, void *cb_arg, + struct ftl_md_io_entry_ctx *ctx); + +/** + * Retries a persist operation performed by ftl_md_persist_entry. + * + * @param ctx Operation context structure. + */ +void ftl_md_persist_entry_retry(struct ftl_md_io_entry_ctx *ctx); + +/** + * Reads given entries from metadata region + * + * @param md Metadata to be read + * @param start_entry Starting index of entry to be read + * @param buffer DMA buffer for reading the entry from the device + * @param vss_buffer DMA buffer for reading the entry VSS from the device + * @param cb Completion called on read entry end + * @param cb_arg Context returned on completion + * @param ctx Operation context structure + */ +void ftl_md_read_entry(struct ftl_md *md, uint64_t start_entry, void *buffer, void *vss_buffer, + ftl_md_io_entry_cb cb, void *cb_arg, struct ftl_md_io_entry_ctx *ctx); + +/** + * @brief Clears metadata on the region which is set + * + * @param md Metadata to be cleared + * @param pattern Pattern used to initialize metadata + * @param vss_pattern Pattern used to initialize metadata VSS + * + * @note size of pattern needs to be aligned to FTL device transfer size + */ +void ftl_md_clear(struct ftl_md *md, int pattern, union ftl_md_vss *vss_pattern); + +/** + * @brief Gets the number of blocks that are transfered in a single IO operation + * + * @param dev The FTL device + * + * @return Number of blocks + */ +uint64_t ftl_md_xfer_blocks(struct spdk_ftl_dev *dev); + +#endif /* FTL_MD_H */