From ef2e614cefe80264a1ca6e330d0f70739be69907 Mon Sep 17 00:00:00 2001 From: Wojciech Malikowski Date: Mon, 29 Oct 2018 08:17:34 -0400 Subject: [PATCH] ftl: Initial implementation This patch adds core FTL (flash translation layer) submodules. On regular SSDs the FTL is part of firmware, whereas Open Channel enables moving it to the host and allows for having full controll of data placement on the device. Main functionalities added: * logical to physical address map * read / write buffer cache * wear-leveling * bad block management Change-Id: I5c28aa277b212734bd4b1f71ae386b3d6f8c3715 Signed-off-by: Konrad Sztyber Signed-off-by: Wojciech Malikowski Signed-off-by: Jakub Radtke Signed-off-by: Mateusz Kozlowski Reviewed-on: https://review.gerrithub.io/c/431322 Tested-by: SPDK CI Jenkins Chandler-Test-Pool: SPDK Automated Test System Reviewed-by: Ben Walker Reviewed-by: Jim Harris --- CONFIG | 3 + configure | 6 + lib/Makefile | 1 + lib/ftl/Makefile | 40 + lib/ftl/ftl_band.c | 969 +++++++++++++++++++++ lib/ftl/ftl_band.h | 253 ++++++ lib/ftl/ftl_core.c | 1470 ++++++++++++++++++++++++++++++++ lib/ftl/ftl_core.h | 2 +- lib/ftl/ftl_debug.c | 163 ++++ lib/ftl/ftl_debug.h | 75 ++ lib/ftl/ftl_io.c | 371 ++++++++ lib/ftl/ftl_io.h | 9 +- lib/ftl/ftl_ppa.h | 2 +- lib/ftl/ftl_rwb.c | 461 ++++++++++ lib/ftl/ftl_rwb.h | 162 ++++ mk/spdk.modules.mk | 4 + test/common/autotest_common.sh | 5 + 17 files changed, 3989 insertions(+), 7 deletions(-) create mode 100644 lib/ftl/Makefile create mode 100644 lib/ftl/ftl_band.c create mode 100644 lib/ftl/ftl_band.h create mode 100644 lib/ftl/ftl_core.c create mode 100644 lib/ftl/ftl_debug.c create mode 100644 lib/ftl/ftl_debug.h create mode 100644 lib/ftl/ftl_io.c create mode 100644 lib/ftl/ftl_rwb.c create mode 100644 lib/ftl/ftl_rwb.h diff --git a/CONFIG b/CONFIG index 3104212a2..31bd63422 100644 --- a/CONFIG +++ b/CONFIG @@ -118,3 +118,6 @@ CONFIG_VTUNE_DIR= # Build the dpdk igb_uio driver CONFIG_IGB_UIO_DRIVER=n + +# Build FTL library +CONFIG_FTL=n diff --git a/configure b/configure index cba938dde..c8867b66b 100755 --- a/configure +++ b/configure @@ -263,6 +263,12 @@ for i in "$@"; do --without-igb-uio-driver) CONFIG[IGB_UIO_DRIVER]=n ;; + --with-ftl) + CONFIG[FTL]=y + ;; + --without-ftl) + CONFIG[FTL]=n + ;; --) break ;; diff --git a/lib/Makefile b/lib/Makefile index 5582ef6ab..40c2df08d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -44,6 +44,7 @@ DIRS-$(CONFIG_VIRTIO) += virtio endif DIRS-$(CONFIG_REDUCE) += reduce +DIRS-$(CONFIG_FTL) += ftl # If CONFIG_ENV is pointing at a directory in lib, build it. # Out-of-tree env implementations must be built separately by the user. diff --git a/lib/ftl/Makefile b/lib/ftl/Makefile new file mode 100644 index 000000000..e64a7d85f --- /dev/null +++ b/lib/ftl/Makefile @@ -0,0 +1,40 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +C_SRCS = ftl_band.c ftl_core.c ftl_debug.c ftl_io.c ftl_rwb.c +LIBNAME = ftl + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/ftl/ftl_band.c b/lib/ftl/ftl_band.c new file mode 100644 index 000000000..575b1de67 --- /dev/null +++ b/lib/ftl/ftl_band.c @@ -0,0 +1,969 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/crc32.h" +#include "spdk/likely.h" +#include "spdk/util.h" +#include "spdk/ftl.h" + +#include "ftl_band.h" +#include "ftl_io.h" +#include "ftl_core.h" +#include "ftl_debug.h" + +/* TODO: define some signature for meta version */ +#define FTL_MD_VER 1 + +struct __attribute__((packed)) ftl_md_hdr { + /* Device instance */ + struct spdk_uuid uuid; + + /* Meta version */ + uint8_t ver; + + /* Sequence number */ + uint64_t seq; + + /* CRC32 checksum */ + uint32_t checksum; +}; + +/* End metadata layout stored on media (with all three being aligned to block size): */ +/* - header */ +/* - valid bitmap */ +/* - LBA map */ +struct __attribute__((packed)) ftl_tail_md { + struct ftl_md_hdr hdr; + + /* Max number of lbks */ + uint64_t num_lbks; + + uint8_t reserved[4059]; +}; +SPDK_STATIC_ASSERT(sizeof(struct ftl_tail_md) == FTL_BLOCK_SIZE, "Incorrect metadata size"); + +struct __attribute__((packed)) ftl_head_md { + struct ftl_md_hdr hdr; + + /* Number of defrag cycles */ + uint64_t wr_cnt; + + /* Number of surfaced LBAs */ + uint64_t lba_cnt; + + /* Transfer size */ + uint32_t xfer_size; +}; + +size_t +ftl_tail_md_hdr_num_lbks(void) +{ + return spdk_divide_round_up(sizeof(struct ftl_tail_md), FTL_BLOCK_SIZE); +} + +size_t +ftl_vld_map_num_lbks(const struct spdk_ftl_dev *dev) +{ + return spdk_divide_round_up(ftl_vld_map_size(dev), FTL_BLOCK_SIZE); +} + +size_t +ftl_lba_map_num_lbks(const struct spdk_ftl_dev *dev) +{ + return spdk_divide_round_up(ftl_num_band_lbks(dev) * sizeof(uint64_t), FTL_BLOCK_SIZE); +} + +size_t +ftl_head_md_num_lbks(const struct spdk_ftl_dev *dev) +{ + return dev->xfer_size; +} + +size_t +ftl_tail_md_num_lbks(const struct spdk_ftl_dev *dev) +{ + return spdk_divide_round_up(ftl_tail_md_hdr_num_lbks() + + ftl_vld_map_num_lbks(dev) + + ftl_lba_map_num_lbks(dev), + dev->xfer_size) * dev->xfer_size; +} + +static uint64_t +ftl_band_tail_md_offset(struct ftl_band *band) +{ + return ftl_band_num_usable_lbks(band) - + ftl_tail_md_num_lbks(band->dev); +} + +int +ftl_band_full(struct ftl_band *band, size_t offset) +{ + return offset == ftl_band_tail_md_offset(band); +} + +void +ftl_band_write_failed(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + + band->high_prio = 1; + band->tail_md_ppa = ftl_to_ppa(FTL_PPA_INVALID); + + if (!dev->df_band) { + dev->df_band = band; + } + + ftl_band_set_state(band, FTL_BAND_STATE_CLOSED); +} + +void +ftl_band_clear_md(struct ftl_band *band) +{ + spdk_bit_array_clear_mask(band->md.vld_map); + memset(band->md.lba_map, 0, ftl_num_band_lbks(band->dev) * sizeof(uint64_t)); + band->md.num_vld = 0; +} + +static void +ftl_band_free_md(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md *md = &band->md; + + assert(band->state == FTL_BAND_STATE_CLOSED || + band->state == FTL_BAND_STATE_FREE); + assert(md->ref_cnt == 0); + assert(md->lba_map != NULL); + assert(!band->high_prio); + + /* Verify that band's metadata is consistent with l2p */ + if (band->num_chunks) { + assert(ftl_band_validate_md(band, band->md.lba_map) == true); + } + + spdk_mempool_put(dev->lba_pool, md->lba_map); + md->lba_map = NULL; +} + +static void +_ftl_band_set_free(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_band *lband, *prev; + + /* Verify band's previous state */ + assert(band->state == FTL_BAND_STATE_CLOSED); + + if (band == dev->df_band) { + dev->df_band = NULL; + } + + /* Remove the band from the closed band list */ + LIST_REMOVE(band, list_entry); + + /* Keep the list sorted by band's write count */ + LIST_FOREACH(lband, &dev->free_bands, list_entry) { + if (lband->md.wr_cnt > band->md.wr_cnt) { + LIST_INSERT_BEFORE(lband, band, list_entry); + break; + } + prev = lband; + } + + if (!lband) { + if (LIST_EMPTY(&dev->free_bands)) { + LIST_INSERT_HEAD(&dev->free_bands, band, list_entry); + } else { + LIST_INSERT_AFTER(prev, band, list_entry); + } + } + +#if defined(DEBUG) + prev = NULL; + LIST_FOREACH(lband, &dev->free_bands, list_entry) { + if (!prev) { + continue; + } + assert(prev->md.wr_cnt <= lband->md.wr_cnt); + } +#endif + dev->num_free++; + ftl_apply_limits(dev); +} + +static void +_ftl_band_set_opening(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md *md = &band->md; + + /* Verify band's previous state */ + assert(band->state == FTL_BAND_STATE_PREP); + LIST_REMOVE(band, list_entry); + + md->wr_cnt++; + + assert(dev->num_free > 0); + dev->num_free--; + + ftl_apply_limits(dev); +} + +static void +_ftl_band_set_closed(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_chunk *chunk; + + /* TODO: add this kind of check in band_set_state() */ + if (band->state == FTL_BAND_STATE_CLOSED) { + return; + } + + /* Set the state as free_md() checks for that */ + band->state = FTL_BAND_STATE_CLOSED; + + /* Free the md if there are no outstanding IOs */ + ftl_band_release_md(band); + + if (spdk_likely(band->num_chunks)) { + LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry); + CIRCLEQ_FOREACH(chunk, &band->chunks, circleq) { + chunk->state = FTL_CHUNK_STATE_CLOSED; + } + } else { + LIST_REMOVE(band, list_entry); + } +} + +static uint32_t +ftl_md_calc_crc(const struct ftl_md_hdr *hdr, size_t size) +{ + size_t checkoff = offsetof(struct ftl_md_hdr, checksum); + size_t mdoff = checkoff + sizeof(hdr->checksum); + uint32_t crc; + + crc = spdk_crc32c_update(hdr, checkoff, 0); + return spdk_crc32c_update((const char *)hdr + mdoff, size - mdoff, crc); +} + +static void +ftl_set_md_hdr(struct spdk_ftl_dev *dev, struct ftl_md_hdr *hdr, + struct ftl_md *md, size_t size) +{ + hdr->seq = md->seq; + hdr->ver = FTL_MD_VER; + hdr->uuid = dev->uuid; + hdr->checksum = ftl_md_calc_crc(hdr, size); +} + +static int +ftl_pack_head_md(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + struct ftl_head_md *head = data; + + head->wr_cnt = md->wr_cnt; + head->lba_cnt = dev->num_lbas; + head->xfer_size = dev->xfer_size; + ftl_set_md_hdr(dev, &head->hdr, md, sizeof(struct ftl_head_md)); + + return FTL_MD_SUCCESS; +} + +static int +ftl_pack_tail_md(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + struct ftl_tail_md *tail = data; + size_t map_size; + void *vld_offset, *map_offset; + + map_size = ftl_num_band_lbks(dev) * sizeof(uint64_t); + vld_offset = (char *)data + ftl_tail_md_hdr_num_lbks() * FTL_BLOCK_SIZE; + map_offset = (char *)vld_offset + ftl_vld_map_num_lbks(dev) * FTL_BLOCK_SIZE; + + /* Clear out the buffer */ + memset(data, 0, ftl_tail_md_num_lbks(dev) * FTL_BLOCK_SIZE); + tail->num_lbks = ftl_num_band_lbks(dev); + + pthread_spin_lock(&md->lock); + spdk_bit_array_store_mask(md->vld_map, vld_offset); + pthread_spin_unlock(&md->lock); + + memcpy(map_offset, md->lba_map, map_size); + ftl_set_md_hdr(dev, &tail->hdr, md, ftl_tail_md_num_lbks(dev) * FTL_BLOCK_SIZE); + + return FTL_MD_SUCCESS; +} + +static int +ftl_md_hdr_vld(struct spdk_ftl_dev *dev, const struct ftl_md_hdr *hdr, size_t size) +{ + if (spdk_uuid_compare(&dev->uuid, &hdr->uuid) != 0) { + return FTL_MD_NO_MD; + } + + if (hdr->ver != FTL_MD_VER) { + return FTL_MD_INVALID_VER; + } + + if (ftl_md_calc_crc(hdr, size) != hdr->checksum) { + return FTL_MD_INVALID_CRC; + } + + return FTL_MD_SUCCESS; +} + +static int +ftl_unpack_tail_md(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + struct ftl_tail_md *tail = data; + size_t map_size; + void *vld_offset, *map_offset; + int rc; + + map_size = ftl_num_band_lbks(dev) * sizeof(uint64_t); + vld_offset = (char *)data + ftl_tail_md_hdr_num_lbks() * FTL_BLOCK_SIZE; + map_offset = (char *)vld_offset + ftl_vld_map_num_lbks(dev) * FTL_BLOCK_SIZE; + + rc = ftl_md_hdr_vld(dev, &tail->hdr, ftl_tail_md_num_lbks(dev) * FTL_BLOCK_SIZE); + if (rc) { + return rc; + } + + if (tail->num_lbks != ftl_num_band_lbks(dev)) { + return FTL_MD_INVALID_SIZE; + } + + if (md->vld_map) { + spdk_bit_array_load_mask(md->vld_map, vld_offset); + } + + if (md->lba_map) { + memcpy(md->lba_map, map_offset, map_size); + } + + md->seq = tail->hdr.seq; + return FTL_MD_SUCCESS; +} + +static int +ftl_unpack_lba_map(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + memcpy(md->lba_map, data, ftl_num_band_lbks(dev) * sizeof(uint64_t)); + return FTL_MD_SUCCESS; +} + +static int +ftl_unpack_head_md(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + struct ftl_head_md *head = data; + int rc; + + rc = ftl_md_hdr_vld(dev, &head->hdr, sizeof(struct ftl_head_md)); + if (rc) { + return rc; + } + + md->seq = head->hdr.seq; + md->wr_cnt = head->wr_cnt; + + if (dev->global_md.num_lbas == 0) { + dev->global_md.num_lbas = head->lba_cnt; + } + + if (dev->global_md.num_lbas != head->lba_cnt) { + return FTL_MD_INVALID_SIZE; + } + + if (dev->xfer_size != head->xfer_size) { + return FTL_MD_INVALID_SIZE; + } + + return FTL_MD_SUCCESS; +} + +struct ftl_ppa +ftl_band_tail_md_ppa(struct ftl_band *band) +{ + struct ftl_ppa ppa; + struct ftl_chunk *chunk; + struct spdk_ftl_dev *dev = band->dev; + size_t xfer_size = dev->xfer_size; + size_t num_req = ftl_band_tail_md_offset(band) / xfer_size; + size_t i; + + if (spdk_unlikely(!band->num_chunks)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + + /* Metadata should be aligned to xfer size */ + assert(ftl_band_tail_md_offset(band) % xfer_size == 0); + + chunk = CIRCLEQ_FIRST(&band->chunks); + for (i = 0; i < num_req % band->num_chunks; ++i) { + chunk = ftl_band_next_chunk(band, chunk); + } + + ppa.lbk = (num_req / band->num_chunks) * xfer_size; + ppa.chk = band->id; + ppa.pu = chunk->punit->start_ppa.pu; + ppa.grp = chunk->punit->start_ppa.grp; + + return ppa; +} + +struct ftl_ppa +ftl_band_head_md_ppa(struct ftl_band *band) +{ + struct ftl_ppa ppa; + + if (spdk_unlikely(!band->num_chunks)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + + ppa = CIRCLEQ_FIRST(&band->chunks)->punit->start_ppa; + ppa.chk = band->id; + + return ppa; +} + +void +ftl_band_set_state(struct ftl_band *band, enum ftl_band_state state) +{ + switch (state) { + case FTL_BAND_STATE_FREE: + _ftl_band_set_free(band); + break; + + case FTL_BAND_STATE_OPENING: + _ftl_band_set_opening(band); + break; + + case FTL_BAND_STATE_CLOSED: + _ftl_band_set_closed(band); + break; + + default: + break; + } + + band->state = state; +} + +void +ftl_band_set_addr(struct ftl_band *band, uint64_t lba, struct ftl_ppa ppa) +{ + struct ftl_md *md = &band->md; + uint64_t offset; + + assert(lba != FTL_LBA_INVALID); + + offset = ftl_band_lbkoff_from_ppa(band, ppa); + pthread_spin_lock(&band->md.lock); + + md->num_vld++; + md->lba_map[offset] = lba; + spdk_bit_array_set(md->vld_map, offset); + + pthread_spin_unlock(&band->md.lock); +} + +size_t +ftl_band_age(const struct ftl_band *band) +{ + return (size_t)(band->dev->seq - band->md.seq); +} + +size_t +ftl_band_num_usable_lbks(const struct ftl_band *band) +{ + return band->num_chunks * ftl_dev_lbks_in_chunk(band->dev); +} + +size_t +ftl_band_user_lbks(const struct ftl_band *band) +{ + return ftl_band_num_usable_lbks(band) - + ftl_head_md_num_lbks(band->dev) - + ftl_tail_md_num_lbks(band->dev); +} + +struct ftl_band * +ftl_band_from_ppa(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) +{ + assert(ppa.chk < ftl_dev_num_bands(dev)); + return &dev->bands[ppa.chk]; +} + +struct ftl_chunk * +ftl_band_chunk_from_ppa(struct ftl_band *band, struct ftl_ppa ppa) +{ + struct spdk_ftl_dev *dev = band->dev; + unsigned int punit; + + punit = ftl_ppa_flatten_punit(dev, ppa); + assert(punit < ftl_dev_num_punits(dev)); + + return &band->chunk_buf[punit]; +} + +uint64_t +ftl_band_lbkoff_from_ppa(struct ftl_band *band, struct ftl_ppa ppa) +{ + struct spdk_ftl_dev *dev = band->dev; + unsigned int punit; + + punit = ftl_ppa_flatten_punit(dev, ppa); + assert(ppa.chk == band->id); + + return punit * ftl_dev_lbks_in_chunk(dev) + ppa.lbk; +} + +struct ftl_ppa +ftl_band_next_xfer_ppa(struct ftl_band *band, struct ftl_ppa ppa, size_t num_lbks) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_chunk *chunk; + unsigned int punit_num; + size_t num_xfers, num_stripes; + + assert(ppa.chk == band->id); + + punit_num = ftl_ppa_flatten_punit(dev, ppa); + chunk = &band->chunk_buf[punit_num]; + + num_lbks += (ppa.lbk % dev->xfer_size); + ppa.lbk -= (ppa.lbk % dev->xfer_size); + +#if defined(DEBUG) + /* Check that the number of chunks has not been changed */ + struct ftl_chunk *_chunk; + size_t _num_chunks = 0; + CIRCLEQ_FOREACH(_chunk, &band->chunks, circleq) { + if (spdk_likely(_chunk->state != FTL_CHUNK_STATE_BAD)) { + _num_chunks++; + } + } + assert(band->num_chunks == _num_chunks); +#endif + num_stripes = (num_lbks / dev->xfer_size) / band->num_chunks; + ppa.lbk += num_stripes * dev->xfer_size; + num_lbks -= num_stripes * dev->xfer_size * band->num_chunks; + + if (ppa.lbk > ftl_dev_lbks_in_chunk(dev)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + + num_xfers = num_lbks / dev->xfer_size; + for (size_t i = 0; i < num_xfers; ++i) { + /* When the last chunk is reached the lbk part of the address */ + /* needs to be increased by xfer_size */ + if (ftl_band_chunk_is_last(band, chunk)) { + ppa.lbk += dev->xfer_size; + if (ppa.lbk > ftl_dev_lbks_in_chunk(dev)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + } + + chunk = ftl_band_next_operational_chunk(band, chunk); + ppa.grp = chunk->start_ppa.grp; + ppa.pu = chunk->start_ppa.pu; + + num_lbks -= dev->xfer_size; + } + + if (num_lbks) { + ppa.lbk += num_lbks; + if (ppa.lbk > ftl_dev_lbks_in_chunk(dev)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + } + + return ppa; +} + +struct ftl_ppa +ftl_band_ppa_from_lbkoff(struct ftl_band *band, uint64_t lbkoff) +{ + struct ftl_ppa ppa = { .ppa = 0 }; + struct spdk_ftl_dev *dev = band->dev; + uint64_t punit; + + punit = lbkoff / ftl_dev_lbks_in_chunk(dev) + dev->range.begin; + + ppa.lbk = lbkoff % ftl_dev_lbks_in_chunk(dev); + ppa.chk = band->id; + ppa.pu = punit / dev->geo.num_grp; + ppa.grp = punit % dev->geo.num_grp; + + return ppa; +} + +struct ftl_ppa +ftl_band_next_ppa(struct ftl_band *band, struct ftl_ppa ppa, size_t offset) +{ + uint64_t lbkoff = ftl_band_lbkoff_from_ppa(band, ppa); + return ftl_band_ppa_from_lbkoff(band, lbkoff + offset); +} + +void +ftl_band_acquire_md(struct ftl_band *band) +{ + assert(band->md.lba_map != NULL); + band->md.ref_cnt++; +} + +int +ftl_band_alloc_md(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md *md = &band->md; + + assert(md->ref_cnt == 0); + assert(md->lba_map == NULL); + + md->lba_map = spdk_mempool_get(dev->lba_pool); + if (!md->lba_map) { + return -1; + } + + ftl_band_acquire_md(band); + return 0; +} + +void +ftl_band_release_md(struct ftl_band *band) +{ + struct ftl_md *md = &band->md; + + assert(band->md.lba_map != NULL); + assert(md->ref_cnt > 0); + md->ref_cnt--; + + if (md->ref_cnt == 0) { + ftl_band_free_md(band); + } +} + +static void +ftl_read_md_cb(void *arg, int status) +{ + struct ftl_md_io *md_io = arg; + + if (!status) { + status = md_io->pack_fn(md_io->io.dev, + md_io->md, + md_io->buf); + } else { + status = FTL_MD_IO_FAILURE; + } + + md_io->cb.fn(md_io->cb.ctx, status); +} + +static struct ftl_md_io * +ftl_io_init_md_read(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data, struct ftl_ppa ppa, + struct ftl_band *band, size_t lbk_cnt, size_t req_size, ftl_md_pack_fn fn, + const struct ftl_cb *cb) +{ + struct ftl_md_io *io; + struct ftl_io_init_opts opts = { + .dev = dev, + .io = NULL, + .rwb_batch = NULL, + .band = band, + .size = sizeof(*io), + .flags = FTL_IO_MD | FTL_IO_PPA_MODE, + .type = FTL_IO_READ, + .iov_cnt = spdk_divide_round_up(lbk_cnt, req_size), + .req_size = req_size, + .fn = ftl_read_md_cb, + .data = data, + }; + + io = (struct ftl_md_io *)ftl_io_init_internal(&opts); + if (!io) { + return NULL; + } + + io->io.ppa = ppa; + io->md = md; + io->buf = data; + io->pack_fn = fn; + io->cb = *cb; + + return io; +} + +static struct ftl_io * +ftl_io_init_md_write(struct spdk_ftl_dev *dev, struct ftl_band *band, + void *data, size_t req_cnt, spdk_ftl_fn cb) +{ + struct ftl_io_init_opts opts = { + .dev = dev, + .io = NULL, + .rwb_batch = NULL, + .band = band, + .size = sizeof(struct ftl_io), + .flags = FTL_IO_MD | FTL_IO_PPA_MODE, + .type = FTL_IO_WRITE, + .iov_cnt = req_cnt, + .req_size = dev->xfer_size, + .fn = cb, + .data = data, + .md = NULL, + }; + + return ftl_io_init_internal(&opts); +} + +static int +ftl_band_write_md(struct ftl_band *band, void *data, size_t lbk_cnt, + ftl_md_pack_fn md_fn, spdk_ftl_fn cb) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_io *io; + + io = ftl_io_init_md_write(dev, band, data, + spdk_divide_round_up(lbk_cnt, dev->xfer_size), cb); + if (!io) { + return -ENOMEM; + } + + md_fn(dev, &band->md, data); + + return ftl_io_write(io); +} + +void +ftl_band_md_clear(struct ftl_md *md) +{ + md->seq = 0; + md->num_vld = 0; + md->wr_cnt = 0; + md->lba_map = NULL; +} + +int +ftl_band_write_head_md(struct ftl_band *band, void *data, spdk_ftl_fn cb) +{ + return ftl_band_write_md(band, data, ftl_head_md_num_lbks(band->dev), + ftl_pack_head_md, cb); +} + +int +ftl_band_write_tail_md(struct ftl_band *band, void *data, spdk_ftl_fn cb) +{ + return ftl_band_write_md(band, data, ftl_tail_md_num_lbks(band->dev), + ftl_pack_tail_md, cb); +} + +static struct ftl_ppa +ftl_band_lba_map_ppa(struct ftl_band *band) +{ + return ftl_band_next_xfer_ppa(band, band->tail_md_ppa, + ftl_tail_md_hdr_num_lbks() + + ftl_vld_map_num_lbks(band->dev)); +} + +static int +ftl_band_read_md(struct ftl_band *band, struct ftl_md *md, void *data, size_t lbk_cnt, + size_t req_size, struct ftl_ppa start_ppa, ftl_md_pack_fn unpack_fn, + const struct ftl_cb *cb) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md_io *io; + + if (spdk_unlikely(!band->num_chunks)) { + return -ENOENT; + } + + io = ftl_io_init_md_read(dev, md, data, start_ppa, band, lbk_cnt, + req_size, unpack_fn, cb); + if (!io) { + return -ENOMEM; + } + + return ftl_io_read((struct ftl_io *)io); +} + +int +ftl_band_read_tail_md(struct ftl_band *band, struct ftl_md *md, + void *data, struct ftl_ppa ppa, const struct ftl_cb *cb) +{ + return ftl_band_read_md(band, md, data, + ftl_tail_md_num_lbks(band->dev), + band->dev->xfer_size, + ppa, + ftl_unpack_tail_md, + cb); +} + +int +ftl_band_read_lba_map(struct ftl_band *band, struct ftl_md *md, + void *data, const struct ftl_cb *cb) +{ + /* TODO: change this interface to allow reading parts of the LBA map instead of */ + /* reading whole metadata */ + return ftl_band_read_md(band, md, data, + ftl_lba_map_num_lbks(band->dev), + band->dev->xfer_size, + ftl_band_lba_map_ppa(band), + ftl_unpack_lba_map, + cb); +} + +int +ftl_band_read_head_md(struct ftl_band *band, struct ftl_md *md, + void *data, const struct ftl_cb *cb) +{ + return ftl_band_read_md(band, md, data, + ftl_head_md_num_lbks(band->dev), + band->dev->xfer_size, + ftl_band_head_md_ppa(band), + ftl_unpack_head_md, + cb); +} + +static void +ftl_band_remove_chunk(struct ftl_band *band, struct ftl_chunk *chunk) +{ + CIRCLEQ_REMOVE(&band->chunks, chunk, circleq); + band->num_chunks--; +} + +static void +ftl_erase_fail(struct ftl_io *io, int status) +{ + struct ftl_chunk *chunk; + char buf[128]; + + SPDK_ERRLOG("Erase failed @ppa: %s, status: %d\n", + ftl_ppa2str(io->ppa, buf, sizeof(buf)), status); + + chunk = ftl_band_chunk_from_ppa(io->band, io->ppa); + chunk->state = FTL_CHUNK_STATE_BAD; + ftl_band_remove_chunk(io->band, chunk); +} + +static void +ftl_band_erase_cb(void *ctx, int status) +{ + struct ftl_io *io = ctx; + struct ftl_chunk *chunk; + + if (spdk_unlikely(status)) { + ftl_erase_fail(io, status); + return; + } + chunk = ftl_band_chunk_from_ppa(io->band, io->ppa); + chunk->state = FTL_CHUNK_STATE_FREE; +} + +int +ftl_band_erase(struct ftl_band *band) +{ + struct ftl_chunk *chunk; + struct ftl_io *io; + int rc = 0; + + assert(band->state == FTL_BAND_STATE_CLOSED || + band->state == FTL_BAND_STATE_FREE); + + ftl_band_set_state(band, FTL_BAND_STATE_PREP); + + CIRCLEQ_FOREACH(chunk, &band->chunks, circleq) { + if (chunk->state == FTL_CHUNK_STATE_FREE) { + continue; + } + + io = ftl_io_erase_init(band, 1, ftl_band_erase_cb); + if (!io) { + rc = -ENOMEM; + break; + } + + io->ppa = chunk->start_ppa; + rc = ftl_io_erase(io); + if (rc) { + assert(0); + /* TODO: change band's state back to close? */ + break; + } + } + + return rc; +} + +int +ftl_band_write_prep(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + + if (ftl_band_alloc_md(band)) { + return -1; + } + + band->md.seq = ++dev->seq; + return 0; +} + +struct ftl_chunk * +ftl_band_next_operational_chunk(struct ftl_band *band, struct ftl_chunk *chunk) +{ + struct ftl_chunk *result = NULL; + struct ftl_chunk *entry; + + if (spdk_unlikely(!band->num_chunks)) { + return NULL; + } + + /* Erasing band may fail after it was assigned to wptr. */ + /* In such a case chunk is no longer in band->chunks queue. */ + if (spdk_likely(chunk->state != FTL_CHUNK_STATE_BAD)) { + result = ftl_band_next_chunk(band, chunk); + } else { + CIRCLEQ_FOREACH_REVERSE(entry, &band->chunks, circleq) { + if (entry->pos > chunk->pos) { + result = entry; + } else { + if (!result) { + result = CIRCLEQ_FIRST(&band->chunks); + } + break; + } + } + } + + return result; +} diff --git a/lib/ftl/ftl_band.h b/lib/ftl/ftl_band.h new file mode 100644 index 000000000..4a9788511 --- /dev/null +++ b/lib/ftl/ftl_band.h @@ -0,0 +1,253 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FTL_BAND_H +#define FTL_BAND_H + +#include "spdk/stdinc.h" +#include "spdk/bit_array.h" +#include "spdk/queue.h" + +#include "ftl_ppa.h" + +struct spdk_ftl_dev; +struct ftl_cb; + +enum ftl_chunk_state { + FTL_CHUNK_STATE_FREE, + FTL_CHUNK_STATE_OPEN, + FTL_CHUNK_STATE_CLOSED, + FTL_CHUNK_STATE_BAD, + FTL_CHUNK_STATE_VACANT, +}; + +struct ftl_chunk { + /* Block state */ + enum ftl_chunk_state state; + + /* First PPA */ + struct ftl_ppa start_ppa; + + /* Pointer to parallel unit */ + struct ftl_punit *punit; + + /* Position in band's chunk_buf */ + uint32_t pos; + + CIRCLEQ_ENTRY(ftl_chunk) circleq; +}; + +enum ftl_md_status { + FTL_MD_SUCCESS, + /* Metadata read failure */ + FTL_MD_IO_FAILURE, + /* Invalid version */ + FTL_MD_INVALID_VER, + /* UUID doesn't match */ + FTL_MD_NO_MD, + /* UUID and version matches but CRC doesn't */ + FTL_MD_INVALID_CRC, + /* Vld or lba map size doesn't match */ + FTL_MD_INVALID_SIZE +}; + +struct ftl_md { + /* Sequence number */ + uint64_t seq; + + /* Number of defrag cycles */ + uint64_t wr_cnt; + + /* LBA/vld map lock */ + pthread_spinlock_t lock; + + /* Number of valid LBAs */ + size_t num_vld; + + /* LBA map's reference count */ + size_t ref_cnt; + + /* Bitmap of valid LBAs */ + struct spdk_bit_array *vld_map; + + /* LBA map (only valid for open bands) */ + uint64_t *lba_map; +}; + +enum ftl_band_state { + FTL_BAND_STATE_FREE, + FTL_BAND_STATE_PREP, + FTL_BAND_STATE_OPENING, + FTL_BAND_STATE_OPEN, + FTL_BAND_STATE_FULL, + FTL_BAND_STATE_CLOSING, + FTL_BAND_STATE_CLOSED, + FTL_BAND_STATE_MAX +}; + +struct ftl_band { + /* Device this band belongs to */ + struct spdk_ftl_dev *dev; + + /* Number of operational chunks */ + size_t num_chunks; + + /* Array of chunks */ + struct ftl_chunk *chunk_buf; + + /* List of operational chunks */ + CIRCLEQ_HEAD(, ftl_chunk) chunks; + + /* Band's metadata */ + struct ftl_md md; + + /* Band's state */ + enum ftl_band_state state; + + /* Band's index */ + unsigned int id; + + /* Latest merit calculation */ + double merit; + + /* High defrag priority - means that the metadata should be copied and */ + /* the band should be defragged immediately */ + int high_prio; + + /* End metadata start ppa */ + struct ftl_ppa tail_md_ppa; + + /* Free/shut bands' lists */ + LIST_ENTRY(ftl_band) list_entry; + + /* High priority queue link */ + STAILQ_ENTRY(ftl_band) prio_stailq; +}; + +uint64_t ftl_band_lbkoff_from_ppa(struct ftl_band *band, struct ftl_ppa ppa); +struct ftl_ppa ftl_band_ppa_from_lbkoff(struct ftl_band *band, uint64_t lbkoff); +void ftl_band_set_state(struct ftl_band *band, enum ftl_band_state state); +size_t ftl_band_age(const struct ftl_band *band); +void ftl_band_acquire_md(struct ftl_band *band); +int ftl_band_alloc_md(struct ftl_band *band); +void ftl_band_release_md(struct ftl_band *band); +struct ftl_ppa ftl_band_next_xfer_ppa(struct ftl_band *band, struct ftl_ppa ppa, + size_t num_lbks); +struct ftl_ppa ftl_band_next_ppa(struct ftl_band *band, struct ftl_ppa ppa, + size_t offset); +size_t ftl_band_num_usable_lbks(const struct ftl_band *band); +size_t ftl_band_user_lbks(const struct ftl_band *band); +void ftl_band_set_addr(struct ftl_band *band, uint64_t lba, + struct ftl_ppa ppa); +struct ftl_band *ftl_band_from_ppa(struct spdk_ftl_dev *dev, struct ftl_ppa ppa); +struct ftl_chunk *ftl_band_chunk_from_ppa(struct ftl_band *band, struct ftl_ppa); +void ftl_band_md_clear(struct ftl_md *md); +int ftl_band_read_tail_md(struct ftl_band *band, struct ftl_md *md, + void *data, struct ftl_ppa, + const struct ftl_cb *cb); +int ftl_band_read_head_md(struct ftl_band *band, struct ftl_md *md, + void *data, const struct ftl_cb *cb); +int ftl_band_read_lba_map(struct ftl_band *band, struct ftl_md *md, + void *data, const struct ftl_cb *cb); +int ftl_band_write_tail_md(struct ftl_band *band, void *data, spdk_ftl_fn cb); +int ftl_band_write_head_md(struct ftl_band *band, void *data, spdk_ftl_fn cb); +struct ftl_ppa ftl_band_tail_md_ppa(struct ftl_band *band); +struct ftl_ppa ftl_band_head_md_ppa(struct ftl_band *band); +void ftl_band_write_failed(struct ftl_band *band); +void ftl_band_clear_md(struct ftl_band *band); +int ftl_band_full(struct ftl_band *band, size_t offset); +int ftl_band_erase(struct ftl_band *band); +int ftl_band_write_prep(struct ftl_band *band); +struct ftl_chunk *ftl_band_next_operational_chunk(struct ftl_band *band, + struct ftl_chunk *chunk); + +static inline int +ftl_band_empty(const struct ftl_band *band) +{ + return band->md.num_vld == 0; +} + +static inline struct ftl_chunk * +ftl_band_next_chunk(struct ftl_band *band, struct ftl_chunk *chunk) +{ + assert(chunk->state != FTL_CHUNK_STATE_BAD); + return CIRCLEQ_LOOP_NEXT(&band->chunks, chunk, circleq); +} + +static inline void +ftl_band_set_next_state(struct ftl_band *band) +{ + ftl_band_set_state(band, (band->state + 1) % FTL_BAND_STATE_MAX); +} + +static inline int +ftl_band_state_changing(struct ftl_band *band) +{ + return band->state == FTL_BAND_STATE_OPENING || + band->state == FTL_BAND_STATE_CLOSING; +} + +static inline int +ftl_band_lbkoff_valid(struct ftl_band *band, size_t lbkoff) +{ + struct ftl_md *md = &band->md; + + pthread_spin_lock(&md->lock); + if (spdk_bit_array_get(md->vld_map, lbkoff)) { + pthread_spin_unlock(&md->lock); + return 1; + } + + pthread_spin_unlock(&md->lock); + return 0; +} + +static inline int +ftl_band_chunk_is_last(struct ftl_band *band, struct ftl_chunk *chunk) +{ + return chunk == CIRCLEQ_LAST(&band->chunks); +} + +static inline int +ftl_band_chunk_is_first(struct ftl_band *band, struct ftl_chunk *chunk) +{ + return chunk == CIRCLEQ_FIRST(&band->chunks); +} + +static inline int +ftl_chunk_is_writable(const struct ftl_chunk *chunk) +{ + return chunk->state == FTL_CHUNK_STATE_OPEN || chunk->state == FTL_CHUNK_STATE_FREE; +} + +#endif /* FTL_BAND_H */ diff --git a/lib/ftl/ftl_core.c b/lib/ftl/ftl_core.c new file mode 100644 index 000000000..73bf1e0ff --- /dev/null +++ b/lib/ftl/ftl_core.c @@ -0,0 +1,1470 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/likely.h" +#include "spdk/stdinc.h" +#include "spdk/nvme.h" +#include "spdk/io_channel.h" +#include "spdk/bdev_module.h" +#include "spdk_internal/log.h" +#include "spdk/ftl.h" + +#include "ftl_core.h" +#include "ftl_band.h" +#include "ftl_io.h" +#include "ftl_rwb.h" +#include "ftl_debug.h" + +/* Max number of iovecs */ +#define FTL_MAX_IOV 1024 + +struct ftl_wptr { + /* Owner device */ + struct spdk_ftl_dev *dev; + + /* Current PPA */ + struct ftl_ppa ppa; + + /* Band currently being written to */ + struct ftl_band *band; + + /* Current logical block's offset */ + uint64_t offset; + + /* Current erase block */ + struct ftl_chunk *chunk; + + /* Metadata DMA buffer */ + void *md_buf; + + /* List link */ + LIST_ENTRY(ftl_wptr) list_entry; +}; + +struct ftl_flush { + /* Owner device */ + struct spdk_ftl_dev *dev; + + /* Number of batches to wait for */ + size_t num_req; + + /* Callback */ + struct ftl_cb cb; + + /* Batch bitmap */ + struct spdk_bit_array *bmap; + + /* List link */ + LIST_ENTRY(ftl_flush) list_entry; +}; + +typedef int (*ftl_next_ppa_fn)(struct ftl_io *, struct ftl_ppa *, size_t, void *); +static void _ftl_read(void *); +static void _ftl_write(void *); + +static int +ftl_rwb_flags_from_io(const struct ftl_io *io) +{ + int valid_flags = FTL_IO_INTERNAL | FTL_IO_WEAK | FTL_IO_PAD; + return io->flags & valid_flags; +} + +static int +ftl_rwb_entry_weak(const struct ftl_rwb_entry *entry) +{ + return entry->flags & FTL_IO_WEAK; +} + +static void +ftl_wptr_free(struct ftl_wptr *wptr) +{ + if (!wptr) { + return; + } + + spdk_dma_free(wptr->md_buf); + free(wptr); +} + +static void +ftl_remove_wptr(struct ftl_wptr *wptr) +{ + LIST_REMOVE(wptr, list_entry); + ftl_wptr_free(wptr); +} + +static void +ftl_io_cmpl_cb(void *arg, const struct spdk_nvme_cpl *status) +{ + struct ftl_io *io = arg; + + if (spdk_nvme_cpl_is_error(status)) { + ftl_io_process_error(io, status); + } + + ftl_trace(completion, ftl_dev_trace(io->dev), io, FTL_TRACE_COMPLETION_DISK); + + if (!ftl_io_dec_req(io)) { + ftl_io_complete(io); + } +} + +static void +ftl_halt_writes(struct spdk_ftl_dev *dev, struct ftl_band *band) +{ + struct ftl_wptr *wptr = NULL; + + LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { + if (wptr->band == band) { + break; + } + } + + /* If the band already has the high_prio flag set, other writes must */ + /* have failed earlier, so it's already taken care of. */ + if (band->high_prio) { + assert(wptr == NULL); + return; + } + + ftl_band_write_failed(band); + ftl_remove_wptr(wptr); +} + +static struct ftl_wptr * +ftl_wptr_from_band(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_wptr *wptr = NULL; + + LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { + if (wptr->band == band) { + return wptr; + } + } + + return NULL; +} + +static void +ftl_md_write_fail(struct ftl_io *io, int status) +{ + struct ftl_band *band = io->band; + struct ftl_wptr *wptr; + char buf[128]; + + wptr = ftl_wptr_from_band(band); + + SPDK_ERRLOG("Metadata write failed @ppa: %s, status: %d\n", + ftl_ppa2str(wptr->ppa, buf, sizeof(buf)), status); + + ftl_halt_writes(io->dev, band); +} + +static void +ftl_md_write_cb(void *arg, int status) +{ + struct ftl_io *io = arg; + struct ftl_wptr *wptr; + + wptr = ftl_wptr_from_band(io->band); + + if (status) { + ftl_md_write_fail(io, status); + return; + } + + ftl_band_set_next_state(io->band); + if (io->band->state == FTL_BAND_STATE_CLOSED) { + ftl_remove_wptr(wptr); + } +} + +static int +ftl_ppa_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa, + size_t lbk, void *ctx) +{ + struct spdk_ftl_dev *dev = io->dev; + size_t lbk_cnt, max_lbks; + + assert(ftl_io_mode_ppa(io)); + assert(io->iov_pos < io->iov_cnt); + + if (lbk == 0) { + *ppa = io->ppa; + } else { + *ppa = ftl_band_next_xfer_ppa(io->band, io->ppa, lbk); + } + + assert(!ftl_ppa_invalid(*ppa)); + + /* Metadata has to be read in the way it's written (jumping across */ + /* the chunks in xfer_size increments) */ + if (io->flags & FTL_IO_MD) { + max_lbks = dev->xfer_size - (ppa->lbk % dev->xfer_size); + lbk_cnt = spdk_min(ftl_io_iovec_len_left(io), max_lbks); + assert(ppa->lbk / dev->xfer_size == (ppa->lbk + lbk_cnt - 1) / dev->xfer_size); + } else { + lbk_cnt = ftl_io_iovec_len_left(io); + } + + return lbk_cnt; +} + +static int +ftl_wptr_close_band(struct ftl_wptr *wptr) +{ + struct ftl_band *band = wptr->band; + + ftl_band_set_state(band, FTL_BAND_STATE_CLOSING); + band->tail_md_ppa = wptr->ppa; + + return ftl_band_write_tail_md(band, wptr->md_buf, ftl_md_write_cb); +} + +static int +ftl_wptr_open_band(struct ftl_wptr *wptr) +{ + struct ftl_band *band = wptr->band; + + assert(ftl_band_chunk_is_first(band, wptr->chunk)); + assert(band->md.num_vld == 0); + + ftl_band_clear_md(band); + + assert(band->state == FTL_BAND_STATE_PREP); + ftl_band_set_state(band, FTL_BAND_STATE_OPENING); + + return ftl_band_write_head_md(band, wptr->md_buf, ftl_md_write_cb); +} + +static int +ftl_submit_erase(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + struct ftl_band *band = io->band; + struct ftl_ppa ppa = io->ppa; + struct ftl_chunk *chunk; + uint64_t ppa_packed; + int rc = 0; + size_t i; + + for (i = 0; i < io->lbk_cnt; ++i) { + if (i != 0) { + chunk = ftl_band_next_chunk(band, ftl_band_chunk_from_ppa(band, ppa)); + assert(chunk->state == FTL_CHUNK_STATE_CLOSED || + chunk->state == FTL_CHUNK_STATE_VACANT); + ppa = chunk->start_ppa; + } + + assert(ppa.lbk == 0); + ppa_packed = ftl_ppa_addr_pack(dev, ppa); + + ftl_io_inc_req(io); + + ftl_trace(submission, ftl_dev_trace(dev), io, ppa, 1); + rc = spdk_nvme_ocssd_ns_cmd_vector_reset(dev->ns, ftl_get_write_qpair(dev), + &ppa_packed, 1, NULL, ftl_io_cmpl_cb, io); + if (rc) { + SPDK_ERRLOG("Vector reset failed with status: %d\n", rc); + ftl_io_dec_req(io); + break; + } + + } + + if (ftl_io_done(io)) { + ftl_io_complete(io); + } + + return rc; +} + +static void +_ftl_io_erase(void *ctx) +{ + ftl_io_erase((struct ftl_io *)ctx); +} + +static bool +ftl_check_core_thread(const struct spdk_ftl_dev *dev) +{ + return dev->core_thread.tid == pthread_self(); +} + +static bool +ftl_check_read_thread(const struct spdk_ftl_dev *dev) +{ + return dev->read_thread.tid == pthread_self(); +} + +int +ftl_io_erase(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + + if (ftl_check_core_thread(dev)) { + return ftl_submit_erase(io); + } + + spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_erase, io); + return 0; +} + +static struct ftl_band * +ftl_next_write_band(struct spdk_ftl_dev *dev) +{ + struct ftl_band *band; + + band = LIST_FIRST(&dev->free_bands); + if (!band) { + return NULL; + } + assert(band->state == FTL_BAND_STATE_FREE); + + if (ftl_band_erase(band)) { + /* TODO: handle erase failure */ + return NULL; + } + + return band; +} + +static struct ftl_band * +ftl_next_wptr_band(struct spdk_ftl_dev *dev) +{ + struct ftl_band *band; + + if (!dev->next_band) { + band = ftl_next_write_band(dev); + } else { + assert(dev->next_band->state == FTL_BAND_STATE_PREP); + band = dev->next_band; + dev->next_band = NULL; + } + + return band; +} + +static struct ftl_wptr * +ftl_wptr_init(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_wptr *wptr; + + wptr = calloc(1, sizeof(*wptr)); + if (!wptr) { + return NULL; + } + + wptr->md_buf = spdk_dma_zmalloc(ftl_tail_md_num_lbks(dev) * FTL_BLOCK_SIZE, + FTL_BLOCK_SIZE, NULL); + if (!wptr->md_buf) { + ftl_wptr_free(wptr); + return NULL; + } + + wptr->dev = dev; + wptr->band = band; + wptr->chunk = CIRCLEQ_FIRST(&band->chunks); + wptr->ppa = wptr->chunk->start_ppa; + + return wptr; +} + +static int +ftl_add_wptr(struct spdk_ftl_dev *dev) +{ + struct ftl_band *band; + struct ftl_wptr *wptr; + + band = ftl_next_wptr_band(dev); + if (!band) { + return -1; + } + + wptr = ftl_wptr_init(band); + if (!wptr) { + return -1; + } + + if (ftl_band_write_prep(band)) { + ftl_wptr_free(wptr); + return -1; + } + + LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: band %u\n", band->id); + ftl_trace(write_band, ftl_dev_trace(dev), band); + return 0; +} + +static void +ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size) +{ + struct ftl_band *band = wptr->band; + struct spdk_ftl_dev *dev = wptr->dev; + struct spdk_ftl_conf *conf = &dev->conf; + size_t next_thld; + + wptr->offset += xfer_size; + next_thld = (ftl_band_num_usable_lbks(band) * conf->band_thld) / 100; + + if (ftl_band_full(band, wptr->offset)) { + ftl_band_set_state(band, FTL_BAND_STATE_FULL); + } + + wptr->ppa = ftl_band_next_xfer_ppa(band, wptr->ppa, xfer_size); + wptr->chunk = ftl_band_next_operational_chunk(band, wptr->chunk); + + assert(!ftl_ppa_invalid(wptr->ppa)); + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: grp:%d, pu:%d chunk:%d, lbk:%u\n", + wptr->ppa.grp, wptr->ppa.pu, wptr->ppa.chk, wptr->ppa.lbk); + + if (wptr->offset >= next_thld && !dev->next_band) { + dev->next_band = ftl_next_write_band(dev); + } +} + +static int +ftl_wptr_ready(struct ftl_wptr *wptr) +{ + struct ftl_band *band = wptr->band; + + /* TODO: add handling of empty bands */ + + if (spdk_unlikely(!ftl_chunk_is_writable(wptr->chunk))) { + /* Erasing band may fail after it was assigned to wptr. */ + if (spdk_unlikely(wptr->chunk->state == FTL_CHUNK_STATE_BAD)) { + ftl_wptr_advance(wptr, wptr->dev->xfer_size); + } + return 0; + } + + /* If we're in the process of writing metadata, wait till it is */ + /* completed. */ + /* TODO: we should probably change bands once we're writing tail md */ + if (ftl_band_state_changing(band)) { + return 0; + } + + if (band->state == FTL_BAND_STATE_FULL) { + if (ftl_wptr_close_band(wptr)) { + /* TODO: need recovery here */ + assert(false); + } + return 0; + } + + if (band->state != FTL_BAND_STATE_OPEN) { + if (ftl_wptr_open_band(wptr)) { + /* TODO: need recovery here */ + assert(false); + } + return 0; + } + + return 1; +} + +static const struct spdk_ftl_limit * +ftl_get_limit(const struct spdk_ftl_dev *dev, int type) +{ + assert(type < SPDK_FTL_LIMIT_MAX); + return &dev->conf.defrag.limits[type]; +} + +static int +ftl_update_md_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) +{ + struct ftl_ppa ppa; + + /* If the LBA is invalid don't bother checking the md and l2p */ + if (spdk_unlikely(entry->lba == FTL_LBA_INVALID)) { + return 1; + } + + ppa = ftl_l2p_get(dev, entry->lba); + if (!(ftl_ppa_cached(ppa) && ppa.offset == entry->pos)) { + ftl_invalidate_addr(dev, entry->ppa); + return 1; + } + + return 0; +} + +static void +ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) +{ + pthread_spin_lock(&entry->lock); + + if (!ftl_rwb_entry_valid(entry)) { + goto unlock; + } + + /* Make sure the metadata is in sync with l2p. If the l2p still contains */ + /* the entry, fill it with the on-disk PPA and clear the cache status */ + /* bit. Otherwise, skip the l2p update and just clear the cache status. */ + /* This can happen, when a write comes during the time that l2p contains */ + /* the entry, but the entry doesn't have a PPA assigned (and therefore */ + /* does not have the cache bit set). */ + if (ftl_update_md_entry(dev, entry)) { + goto clear; + } + + ftl_l2p_set(dev, entry->lba, entry->ppa); +clear: + ftl_rwb_entry_invalidate(entry); +unlock: + pthread_spin_unlock(&entry->lock); +} + +static struct ftl_rwb_entry * +ftl_acquire_entry(struct spdk_ftl_dev *dev, int flags) +{ + struct ftl_rwb_entry *entry; + + entry = ftl_rwb_acquire(dev->rwb, ftl_rwb_type_from_flags(flags)); + if (!entry) { + return NULL; + } + + ftl_evict_cache_entry(dev, entry); + + entry->flags = flags; + return entry; +} + +static void +ftl_rwb_pad(struct spdk_ftl_dev *dev, size_t size) +{ + struct ftl_rwb_entry *entry; + int flags = FTL_IO_PAD | FTL_IO_INTERNAL; + + for (size_t i = 0; i < size; ++i) { + entry = ftl_acquire_entry(dev, flags); + if (!entry) { + break; + } + + entry->lba = FTL_LBA_INVALID; + entry->ppa = ftl_to_ppa(FTL_PPA_INVALID); + memset(entry->data, 0, FTL_BLOCK_SIZE); + ftl_rwb_push(entry); + } +} + +static void +ftl_remove_free_bands(struct spdk_ftl_dev *dev) +{ + while (!LIST_EMPTY(&dev->free_bands)) { + LIST_REMOVE(LIST_FIRST(&dev->free_bands), list_entry); + } + + dev->next_band = NULL; +} + +static void +ftl_process_shutdown(struct spdk_ftl_dev *dev) +{ + size_t size = ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) + + ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER); + + if (size >= dev->xfer_size) { + return; + } + + /* If we reach this point we need to remove free bands */ + /* and pad current wptr band to the end */ + ftl_remove_free_bands(dev); + + /* Pad write buffer until band is full */ + ftl_rwb_pad(dev, dev->xfer_size - size); +} + +static int +ftl_shutdown_complete(struct spdk_ftl_dev *dev) +{ + return !__atomic_load_n(&dev->num_inflight, __ATOMIC_SEQ_CST) && + LIST_EMPTY(&dev->wptr_list); +} + +void +ftl_apply_limits(struct spdk_ftl_dev *dev) +{ + const struct spdk_ftl_limit *limit; + struct ftl_stats *stats = &dev->stats; + size_t rwb_limit[FTL_RWB_TYPE_MAX]; + int i; + + ftl_rwb_get_limits(dev->rwb, rwb_limit); + + /* Clear existing limit */ + dev->limit = SPDK_FTL_LIMIT_MAX; + + for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) { + limit = ftl_get_limit(dev, i); + + if (dev->num_free <= limit->thld) { + rwb_limit[FTL_RWB_TYPE_USER] = + (limit->limit * ftl_rwb_entry_cnt(dev->rwb)) / 100; + stats->limits[i]++; + dev->limit = i; + goto apply; + } + } + + /* Clear the limits, since we don't need to apply them anymore */ + rwb_limit[FTL_RWB_TYPE_USER] = ftl_rwb_entry_cnt(dev->rwb); +apply: + ftl_trace(limits, ftl_dev_trace(dev), rwb_limit, dev->num_free); + ftl_rwb_set_limits(dev->rwb, rwb_limit); +} + +static int +ftl_invalidate_addr_unlocked(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) +{ + struct ftl_band *band = ftl_band_from_ppa(dev, ppa); + struct ftl_md *md = &band->md; + uint64_t offset; + + offset = ftl_band_lbkoff_from_ppa(band, ppa); + + /* The bit might be already cleared if two writes are scheduled to the */ + /* same LBA at the same time */ + if (spdk_bit_array_get(md->vld_map, offset)) { + assert(md->num_vld > 0); + spdk_bit_array_clear(md->vld_map, offset); + md->num_vld--; + return 1; + } + + return 0; +} + +int +ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) +{ + struct ftl_band *band; + int rc; + + assert(!ftl_ppa_cached(ppa)); + band = ftl_band_from_ppa(dev, ppa); + + pthread_spin_lock(&band->md.lock); + rc = ftl_invalidate_addr_unlocked(dev, ppa); + pthread_spin_unlock(&band->md.lock); + + return rc; +} + +static int +ftl_read_retry(int rc) +{ + return rc == -EAGAIN; +} + +static int +ftl_read_canceled(int rc) +{ + return rc == 0; +} + +static int +ftl_submit_read(struct ftl_io *io, ftl_next_ppa_fn next_ppa, + void *ctx) +{ + struct spdk_ftl_dev *dev = io->dev; + struct ftl_ppa ppa; + size_t lbk = 0; + int rc = 0, lbk_cnt; + + while (lbk < io->lbk_cnt) { + /* We might hit the cache here, if so, skip the read */ + lbk_cnt = rc = next_ppa(io, &ppa, lbk, ctx); + + /* We might need to retry the read from scratch (e.g. */ + /* because write was under way and completed before */ + /* we could read it from rwb */ + if (ftl_read_retry(rc)) { + continue; + } + + /* We don't have to schedule the read, as it was read from cache */ + if (ftl_read_canceled(rc)) { + ftl_io_update_iovec(io, 1); + lbk++; + continue; + } + + assert(lbk_cnt > 0); + + ftl_trace(submission, ftl_dev_trace(dev), io, ppa, lbk_cnt); + rc = spdk_nvme_ns_cmd_read(dev->ns, ftl_get_read_qpair(dev), + ftl_io_iovec_addr(io), + ftl_ppa_addr_pack(io->dev, ppa), lbk_cnt, + ftl_io_cmpl_cb, io, 0); + if (rc) { + SPDK_ERRLOG("spdk_nvme_ns_cmd_read failed with status: %d\n", rc); + io->status = -EIO; + break; + } + + ftl_io_update_iovec(io, lbk_cnt); + ftl_io_inc_req(io); + lbk += lbk_cnt; + } + + /* If we didn't have to read anything from the device, */ + /* complete the request right away */ + if (ftl_io_done(io)) { + ftl_io_complete(io); + } + + return rc; +} + +static int +ftl_ppa_cache_read(struct ftl_io *io, uint64_t lba, + struct ftl_ppa ppa, void *buf) +{ + struct ftl_rwb *rwb = io->dev->rwb; + struct ftl_rwb_entry *entry; + struct ftl_ppa nppa; + int rc = 0; + + entry = ftl_rwb_entry_from_offset(rwb, ppa.offset); + pthread_spin_lock(&entry->lock); + + nppa = ftl_l2p_get(io->dev, lba); + if (ppa.ppa != nppa.ppa) { + rc = -1; + goto out; + } + + memcpy(buf, entry->data, FTL_BLOCK_SIZE); +out: + pthread_spin_unlock(&entry->lock); + return rc; +} + +static int +ftl_lba_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa, + size_t lbk, void *ctx) +{ + struct spdk_ftl_dev *dev = io->dev; + *ppa = ftl_l2p_get(dev, io->lba + lbk); + + (void) ctx; + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Read ppa:%lx, lba:%lu\n", ppa->ppa, io->lba); + + /* If the PPA is invalid, skip it (the buffer should already be zero'ed) */ + if (ftl_ppa_invalid(*ppa)) { + ftl_trace(completion, ftl_dev_trace(io->dev), io, + FTL_TRACE_COMPLETION_INVALID); + return 0; + } + + if (ftl_ppa_cached(*ppa)) { + if (!ftl_ppa_cache_read(io, io->lba + lbk, *ppa, ftl_io_iovec_addr(io))) { + ftl_trace(completion, ftl_dev_trace(io->dev), io, + FTL_TRACE_COMPLETION_CACHE); + return 0; + } + + /* If the state changed, we have to re-read the l2p */ + return -EAGAIN; + } + + /* We want to read one lbk at a time */ + return 1; +} + +static void +ftl_complete_flush(struct ftl_flush *flush) +{ + assert(flush->num_req == 0); + LIST_REMOVE(flush, list_entry); + + flush->cb.fn(flush->cb.ctx, 0); + + spdk_bit_array_free(&flush->bmap); + free(flush); +} + +static void +ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_rwb_batch *batch) +{ + struct ftl_flush *flush, *tflush; + size_t offset; + + LIST_FOREACH_SAFE(flush, &dev->flush_list, list_entry, tflush) { + offset = ftl_rwb_batch_get_offset(batch); + + if (spdk_bit_array_get(flush->bmap, offset)) { + spdk_bit_array_set(flush->bmap, offset); + if (!(--flush->num_req)) { + ftl_complete_flush(flush); + } + } + } +} + +static void +ftl_write_fail(struct ftl_io *io, int status) +{ + struct ftl_rwb_batch *batch = io->rwb_batch; + struct spdk_ftl_dev *dev = io->dev; + struct ftl_rwb_entry *entry; + struct ftl_band *band; + char buf[128]; + + entry = ftl_rwb_batch_first_entry(batch); + + band = ftl_band_from_ppa(io->dev, entry->ppa); + SPDK_ERRLOG("Write failed @ppa: %s, status: %d\n", + ftl_ppa2str(entry->ppa, buf, sizeof(buf)), status); + + /* Close the band and, halt wptr and defrag */ + ftl_halt_writes(dev, band); + + ftl_rwb_foreach(entry, batch) { + /* Invalidate meta set by process_writes() */ + ftl_invalidate_addr(dev, entry->ppa); + } + + /* Reset the batch back to the the RWB to resend it later */ + ftl_rwb_batch_revert(batch); +} + +static void +ftl_write_cb(void *arg, int status) +{ + struct ftl_io *io = arg; + struct spdk_ftl_dev *dev = io->dev; + struct ftl_rwb_batch *batch = io->rwb_batch; + struct ftl_rwb_entry *entry; + + if (status) { + ftl_write_fail(io, status); + return; + } + + assert(io->lbk_cnt == dev->xfer_size); + ftl_rwb_foreach(entry, batch) { + if (!(io->flags & FTL_IO_MD) && !(entry->flags & FTL_IO_PAD)) { + /* Verify that the LBA is set for user lbks */ + assert(entry->lba != FTL_LBA_INVALID); + } + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lu, lba:%lu\n", + entry->ppa.ppa, entry->lba); + + if (ftl_update_md_entry(dev, entry)) { + ftl_rwb_entry_invalidate(entry); + } + } + + ftl_process_flush(dev, batch); + ftl_rwb_batch_release(batch); +} + +static void +ftl_update_rwb_stats(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry) +{ + if (!ftl_rwb_entry_internal(entry)) { + dev->stats.write_user++; + } + dev->stats.write_total++; +} + +static void +ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry, + struct ftl_ppa ppa) +{ + struct ftl_ppa prev_ppa; + struct ftl_rwb_entry *prev; + struct ftl_band *band; + int valid; + + prev_ppa = ftl_l2p_get(dev, entry->lba); + if (ftl_ppa_invalid(prev_ppa)) { + ftl_l2p_set(dev, entry->lba, ppa); + return; + } + + /* If the L2P's PPA is different than what we expected we don't need to */ + /* do anything (someone's already overwritten our data). */ + if (ftl_rwb_entry_weak(entry) && !ftl_ppa_cmp(prev_ppa, entry->ppa)) { + return; + } + + if (ftl_ppa_cached(prev_ppa)) { + assert(!ftl_rwb_entry_weak(entry)); + prev = ftl_rwb_entry_from_offset(dev->rwb, prev_ppa.offset); + pthread_spin_lock(&prev->lock); + + /* Re-read the L2P under the lock to protect against updates */ + /* to this LBA from other threads */ + prev_ppa = ftl_l2p_get(dev, entry->lba); + + /* If the entry is no longer in cache, another write has been */ + /* scheduled in the meantime, so we have to invalidate its LBA */ + if (!ftl_ppa_cached(prev_ppa)) { + ftl_invalidate_addr(dev, prev_ppa); + } + + /* If previous entry is part of cache, remove and invalidate it */ + if (ftl_rwb_entry_valid(prev)) { + ftl_invalidate_addr(dev, prev->ppa); + ftl_rwb_entry_invalidate(prev); + } + + ftl_l2p_set(dev, entry->lba, ppa); + pthread_spin_unlock(&prev->lock); + return; + } + + /* Lock the band containing previous PPA. This assures atomic changes to */ + /* the L2P as wall as metadata. The valid bits in metadata are used to */ + /* check weak writes validity. */ + band = ftl_band_from_ppa(dev, prev_ppa); + pthread_spin_lock(&band->md.lock); + + valid = ftl_invalidate_addr_unlocked(dev, prev_ppa); + + /* If the address has been invalidated already, we don't want to update */ + /* the L2P for weak writes, as it means the write is no longer valid. */ + if (!ftl_rwb_entry_weak(entry) || valid) { + ftl_l2p_set(dev, entry->lba, ppa); + } + + pthread_spin_unlock(&band->md.lock); +} + +static int +ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + struct iovec *iov = ftl_io_iovec(io); + int rc = 0; + size_t i; + + for (i = 0; i < io->iov_cnt; ++i) { + assert(iov[i].iov_len > 0); + assert(iov[i].iov_len / PAGE_SIZE == dev->xfer_size); + + ftl_trace(submission, ftl_dev_trace(dev), io, wptr->ppa, + iov[i].iov_len / PAGE_SIZE); + rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev), + iov[i].iov_base, ftl_io_get_md(io), + ftl_ppa_addr_pack(dev, wptr->ppa), + iov[i].iov_len / PAGE_SIZE, + ftl_io_cmpl_cb, io, 0, 0, 0); + if (rc) { + SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n", + rc, wptr->ppa.ppa); + io->status = -EIO; + break; + } + + io->pos = iov[i].iov_len / PAGE_SIZE; + ftl_io_inc_req(io); + ftl_wptr_advance(wptr, iov[i].iov_len / PAGE_SIZE); + } + + if (ftl_io_done(io)) { + ftl_io_complete(io); + } + + return rc; +} + +static void +ftl_flush_pad_batch(struct spdk_ftl_dev *dev) +{ + struct ftl_rwb *rwb = dev->rwb; + size_t size; + + size = ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_INTERNAL) + + ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_USER); + + /* There must be something in the RWB, otherwise the flush */ + /* wouldn't be waiting for anything */ + assert(size > 0); + + /* Only add padding when there's less than xfer size */ + /* entries in the buffer. Otherwise we just have to wait */ + /* for the entries to become ready. */ + if (size < dev->xfer_size) { + ftl_rwb_pad(dev, dev->xfer_size - (size % dev->xfer_size)); + } +} + +static int +ftl_wptr_process_writes(struct ftl_wptr *wptr) +{ + struct spdk_ftl_dev *dev = wptr->dev; + struct ftl_rwb_batch *batch; + struct ftl_rwb_entry *entry; + struct ftl_io *io; + struct ftl_ppa ppa; + + /* Make sure the band is prepared for writing */ + if (!ftl_wptr_ready(wptr)) { + return 0; + } + + if (dev->halt) { + ftl_process_shutdown(dev); + } + + batch = ftl_rwb_pop(dev->rwb); + if (!batch) { + /* If there are queued flush requests we need to pad the RWB to */ + /* force out remaining entries */ + if (!LIST_EMPTY(&dev->flush_list)) { + ftl_flush_pad_batch(dev); + } + + return 0; + } + + io = ftl_io_rwb_init(dev, wptr->band, batch, ftl_write_cb); + if (!io) { + goto error; + } + + ppa = wptr->ppa; + ftl_rwb_foreach(entry, batch) { + entry->ppa = ppa; + /* Setting entry's cache bit needs to be done after metadata */ + /* within the band is updated to make sure that writes */ + /* invalidating the entry clear the metadata as well */ + if (entry->lba != FTL_LBA_INVALID) { + ftl_band_set_addr(wptr->band, entry->lba, entry->ppa); + } + + ftl_rwb_entry_set_valid(entry); + + ftl_trace(rwb_pop, ftl_dev_trace(dev), entry); + ftl_update_rwb_stats(dev, entry); + + ppa = ftl_band_next_ppa(wptr->band, ppa, 1); + } + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lx, %lx\n", wptr->ppa.ppa, + ftl_ppa_addr_pack(dev, wptr->ppa)); + + if (ftl_submit_write(wptr, io)) { + /* TODO: we need some recovery here */ + assert(0 && "Write submit failed"); + if (ftl_io_done(io)) { + ftl_io_free(io); + } + } + + return dev->xfer_size; +error: + ftl_rwb_batch_revert(batch); + return 0; +} + +static int +ftl_process_writes(struct spdk_ftl_dev *dev) +{ + struct ftl_wptr *wptr, *twptr; + size_t num_active = 0; + enum ftl_band_state state; + + LIST_FOREACH_SAFE(wptr, &dev->wptr_list, list_entry, twptr) { + ftl_wptr_process_writes(wptr); + state = wptr->band->state; + + if (state != FTL_BAND_STATE_FULL && + state != FTL_BAND_STATE_CLOSING && + state != FTL_BAND_STATE_CLOSED) { + num_active++; + } + } + + if (num_active < 1) { + ftl_add_wptr(dev); + } + + return 0; +} + +static void +ftl_rwb_entry_fill(struct ftl_rwb_entry *entry, struct ftl_io *io) +{ + struct ftl_band *band; + + memcpy(entry->data, ftl_io_iovec_addr(io), FTL_BLOCK_SIZE); + + if (ftl_rwb_entry_weak(entry)) { + band = ftl_band_from_ppa(io->dev, io->ppa); + entry->ppa = ftl_band_next_ppa(band, io->ppa, io->pos); + } + + entry->trace = io->trace; + + if (entry->md) { + memcpy(entry->md, &entry->lba, sizeof(io->lba)); + } +} + +static int +ftl_rwb_fill(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + struct ftl_rwb_entry *entry; + struct ftl_ppa ppa = { .cached = 1 }; + int flags = ftl_rwb_flags_from_io(io); + uint64_t lba; + + for (; io->pos < io->lbk_cnt; ++io->pos) { + lba = ftl_io_current_lba(io); + if (lba == FTL_LBA_INVALID) { + ftl_io_update_iovec(io, 1); + continue; + } + + entry = ftl_acquire_entry(dev, flags); + if (!entry) { + return -EAGAIN; + } + + entry->lba = lba; + ftl_rwb_entry_fill(entry, io); + + ppa.offset = entry->pos; + + ftl_io_update_iovec(io, 1); + ftl_update_l2p(dev, entry, ppa); + + /* Needs to be done after L2P is updated to avoid race with */ + /* write completion callback when it's processed faster than */ + /* L2P is set in update_l2p(). */ + ftl_rwb_push(entry); + ftl_trace(rwb_fill, ftl_dev_trace(dev), io); + } + + ftl_io_complete(io); + return 0; +} + +int +ftl_current_limit(const struct spdk_ftl_dev *dev) +{ + return dev->limit; +} + +int +spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs) +{ + if (!dev || !attrs) { + return -EINVAL; + } + + attrs->uuid = dev->uuid; + attrs->lbk_cnt = dev->num_lbas; + attrs->lbk_size = FTL_BLOCK_SIZE; + attrs->range = dev->range; + + return 0; +} + +static void +_ftl_io_write(void *ctx) +{ + ftl_io_write((struct ftl_io *)ctx); +} + +int +ftl_io_write(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + + /* For normal IOs we just need to copy the data onto the rwb */ + if (!(io->flags & FTL_IO_MD)) { + return ftl_rwb_fill(io); + } + + /* Metadata has its own buffer, so it doesn't have to be copied, so just */ + /* send it the the core thread and schedule the write immediately */ + if (ftl_check_core_thread(dev)) { + return ftl_submit_write(ftl_wptr_from_band(io->band), io); + } + + spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_write, io); + + return 0; +} + + + +static int +_spdk_ftl_write(struct ftl_io *io) +{ + int rc; + + rc = ftl_io_write(io); + if (rc == -EAGAIN) { + spdk_thread_send_msg(spdk_io_channel_get_thread(io->ch), + _ftl_write, io); + return 0; + } + + if (rc) { + ftl_io_free(io); + } + + return rc; +} + +static void +_ftl_write(void *ctx) +{ + _spdk_ftl_write(ctx); +} + +int +spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, + struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) +{ + struct ftl_io *io; + + if (!iov || !cb_fn || !dev) { + return -EINVAL; + } + + if (iov_cnt == 0 || iov_cnt > FTL_MAX_IOV) { + return -EINVAL; + } + + if (lba_cnt == 0) { + return -EINVAL; + } + + if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { + return -EINVAL; + } + + if (!dev->initialized) { + return -EBUSY; + } + + io = ftl_io_alloc(ch); + if (!io) { + return -ENOMEM; + } + + ftl_io_user_init(dev, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE); + return _spdk_ftl_write(io); +} + +int +ftl_io_read(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + ftl_next_ppa_fn next_ppa; + + if (ftl_check_read_thread(dev)) { + if (ftl_io_mode_ppa(io)) { + next_ppa = ftl_ppa_read_next_ppa; + } else { + next_ppa = ftl_lba_read_next_ppa; + } + + return ftl_submit_read(io, next_ppa, NULL); + } + + spdk_thread_send_msg(ftl_get_read_thread(dev), _ftl_read, io); + return 0; +} + +static void +_ftl_read(void *arg) +{ + ftl_io_read((struct ftl_io *)arg); +} + +int +spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, + struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) +{ + struct ftl_io *io; + + if (!iov || !cb_fn || !dev) { + return -EINVAL; + } + + if (iov_cnt == 0 || iov_cnt > FTL_MAX_IOV) { + return -EINVAL; + } + + if (lba_cnt == 0) { + return -EINVAL; + } + + if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { + return -EINVAL; + } + + if (!dev->initialized) { + return -EBUSY; + } + + io = ftl_io_alloc(ch); + if (!io) { + return -ENOMEM; + } + + ftl_io_user_init(dev, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ); + return ftl_io_read(io); +} + +static struct ftl_flush * +ftl_flush_init(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) +{ + struct ftl_flush *flush; + struct ftl_rwb *rwb = dev->rwb; + + flush = calloc(1, sizeof(*flush)); + if (!flush) { + return NULL; + } + + flush->bmap = spdk_bit_array_create(ftl_rwb_num_batches(rwb)); + if (!flush->bmap) { + goto error; + } + + flush->dev = dev; + flush->cb.fn = cb_fn; + flush->cb.ctx = cb_arg; + + return flush; +error: + free(flush); + return NULL; +} + +static void +_ftl_flush(void *ctx) +{ + struct ftl_flush *flush = ctx; + struct spdk_ftl_dev *dev = flush->dev; + struct ftl_rwb *rwb = dev->rwb; + struct ftl_rwb_batch *batch; + + /* Attach flush object to all non-empty batches */ + ftl_rwb_foreach_batch(batch, rwb) { + if (!ftl_rwb_batch_empty(batch)) { + spdk_bit_array_set(flush->bmap, ftl_rwb_batch_get_offset(batch)); + flush->num_req++; + } + } + + LIST_INSERT_HEAD(&dev->flush_list, flush, list_entry); + + /* If the RWB was already empty, the flush can be completed right away */ + if (!flush->num_req) { + ftl_complete_flush(flush); + } +} + +int +spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) +{ + struct ftl_flush *flush; + + if (!dev || !cb_fn) { + return -EINVAL; + } + + if (!dev->initialized) { + return -EBUSY; + } + + flush = ftl_flush_init(dev, cb_fn, cb_arg); + if (!flush) { + return -ENOMEM; + } + + spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_flush, flush); + return 0; +} + +int +ftl_task_read(void *ctx) +{ + struct ftl_thread *thread = ctx; + struct spdk_ftl_dev *dev = thread->dev; + struct spdk_nvme_qpair *qpair = ftl_get_read_qpair(dev); + + if (dev->halt) { + if (ftl_shutdown_complete(dev)) { + spdk_poller_unregister(&thread->poller); + return 0; + } + } + + return spdk_nvme_qpair_process_completions(qpair, 1); +} + +int +ftl_task_core(void *ctx) +{ + struct ftl_thread *thread = ctx; + struct spdk_ftl_dev *dev = thread->dev; + struct spdk_nvme_qpair *qpair = ftl_get_write_qpair(dev); + + if (dev->halt) { + if (ftl_shutdown_complete(dev)) { + spdk_poller_unregister(&thread->poller); + return 0; + } + } + + ftl_process_writes(dev); + spdk_nvme_qpair_process_completions(qpair, 1); + + return 0; +} + +SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE) diff --git a/lib/ftl/ftl_core.h b/lib/ftl/ftl_core.h index 26d8d0aff..24fa46a5b 100644 --- a/lib/ftl/ftl_core.h +++ b/lib/ftl/ftl_core.h @@ -221,7 +221,7 @@ int ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_ppa ppa); int ftl_task_core(void *ctx); int ftl_task_read(void *ctx); size_t ftl_tail_md_num_lbks(const struct spdk_ftl_dev *dev); -size_t ftl_tail_md_hdr_num_lbks(const struct spdk_ftl_dev *dev); +size_t ftl_tail_md_hdr_num_lbks(void); size_t ftl_vld_map_num_lbks(const struct spdk_ftl_dev *dev); size_t ftl_lba_map_num_lbks(const struct spdk_ftl_dev *dev); size_t ftl_head_md_num_lbks(const struct spdk_ftl_dev *dev); diff --git a/lib/ftl/ftl_debug.c b/lib/ftl/ftl_debug.c new file mode 100644 index 000000000..ca64de39f --- /dev/null +++ b/lib/ftl/ftl_debug.c @@ -0,0 +1,163 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk_internal/log.h" +#include "spdk/ftl.h" +#include "ftl_debug.h" + +#if defined(DEBUG) +#if defined(FTL_META_DEBUG) + +static const char *ftl_band_state_str[] = { + "free", + "prep", + "opening", + "open", + "full", + "closing", + "closed", + "max" +}; + +bool +ftl_band_validate_md(struct ftl_band *band, const uint64_t *lba_map) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md *md = &band->md; + struct ftl_ppa ppa_md, ppa_l2p; + size_t i, size; + bool valid = true; + + size = ftl_num_band_lbks(dev); + + pthread_spin_lock(&md->lock); + for (i = 0; i < size; ++i) { + if (!spdk_bit_array_get(md->vld_map, i)) { + continue; + } + + ppa_md = ftl_band_ppa_from_lbkoff(band, i); + ppa_l2p = ftl_l2p_get(dev, lba_map[i]); + + if (ppa_l2p.cached) { + continue; + } + + if (ppa_l2p.ppa != ppa_md.ppa) { + valid = false; + break; + } + + } + + pthread_spin_unlock(&md->lock); + + return valid; +} + +void +ftl_dev_dump_bands(struct spdk_ftl_dev *dev) +{ + size_t i, total = 0; + + if (!dev->bands) { + return; + } + + ftl_debug("Bands validity:\n"); + for (i = 0; i < ftl_dev_num_bands(dev); ++i) { + if (dev->bands[i].state == FTL_BAND_STATE_FREE && + dev->bands[i].md.wr_cnt == 0) { + continue; + } + + if (!dev->bands[i].num_chunks) { + ftl_debug(" Band %3zu: all chunks are offline\n", i + 1); + continue; + } + + total += dev->bands[i].md.num_vld; + ftl_debug(" Band %3zu: %8zu / %zu \tnum_chunks: %zu \twr_cnt: %"PRIu64"\tmerit:" + "%10.3f\tstate: %s\n", + i + 1, dev->bands[i].md.num_vld, + ftl_band_user_lbks(&dev->bands[i]), + dev->bands[i].num_chunks, + dev->bands[i].md.wr_cnt, + dev->bands[i].merit, + ftl_band_state_str[dev->bands[i].state]); + } +} + +#endif /* defined(FTL_META_DEBUG) */ + +#if defined(FTL_DUMP_STATS) + +void +ftl_dev_dump_stats(const struct spdk_ftl_dev *dev) +{ + size_t i, total = 0; + char uuid[SPDK_UUID_STRING_LEN]; + double waf; + const char *limits[] = { + [SPDK_FTL_LIMIT_CRIT] = "crit", + [SPDK_FTL_LIMIT_HIGH] = "high", + [SPDK_FTL_LIMIT_LOW] = "low", + [SPDK_FTL_LIMIT_START] = "start" + }; + + if (!dev->bands) { + return; + } + + /* Count the number of valid LBAs */ + for (i = 0; i < ftl_dev_num_bands(dev); ++i) { + total += dev->bands[i].md.num_vld; + } + + waf = (double)dev->stats.write_total / (double)dev->stats.write_user; + + spdk_uuid_fmt_lower(uuid, sizeof(uuid), &dev->uuid); + ftl_debug("\n"); + ftl_debug("device UUID: %s\n", uuid); + ftl_debug("total valid LBAs: %zu\n", total); + ftl_debug("total writes: %"PRIu64"\n", dev->stats.write_total); + ftl_debug("user writes: %"PRIu64"\n", dev->stats.write_user); + ftl_debug("WAF: %.4lf\n", waf); + ftl_debug("limits:\n"); + for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) { + ftl_debug(" %5s: %"PRIu64"\n", limits[i], dev->stats.limits[i]); + } +} + +#endif /* defined(FTL_DUMP_STATS) */ +#endif /* defined(DEBUG) */ diff --git a/lib/ftl/ftl_debug.h b/lib/ftl/ftl_debug.h new file mode 100644 index 000000000..c46b3f842 --- /dev/null +++ b/lib/ftl/ftl_debug.h @@ -0,0 +1,75 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FTL_DEBUG_H +#define FTL_DEBUG_H + +#include "ftl_ppa.h" +#include "ftl_band.h" +#include "ftl_core.h" +#include "ftl_rwb.h" + +#if defined(DEBUG) +/* Debug flags - enabled when defined */ +#define FTL_META_DEBUG 1 +#define FTL_DUMP_STATS 1 + +#define ftl_debug(msg, ...) \ + fprintf(stderr, msg, ## __VA_ARGS__) +#else +#define ftl_debug(msg, ...) +#endif + +static inline const char * +ftl_ppa2str(struct ftl_ppa ppa, char *buf, size_t size) +{ + snprintf(buf, size, "(grp: %u, pu: %u, chk: %u, lbk: %u)", + ppa.grp, ppa.pu, ppa.chk, ppa.lbk); + return buf; +} + +#if defined(FTL_META_DEBUG) +bool ftl_band_validate_md(struct ftl_band *band, const uint64_t *lba_map); +void ftl_dev_dump_bands(struct spdk_ftl_dev *dev); +#else +#define ftl_band_validate_md(band, lba_map) +#define ftl_dev_dump_bands(dev) +#endif + +#if defined(FTL_DUMP_STATS) +void ftl_dev_dump_stats(const struct spdk_ftl_dev *dev); +#else +#define ftl_dev_dump_stats(dev) +#endif + +#endif /* FTL_DEBUG_H */ diff --git a/lib/ftl/ftl_io.c b/lib/ftl/ftl_io.c new file mode 100644 index 000000000..a93c00b48 --- /dev/null +++ b/lib/ftl/ftl_io.c @@ -0,0 +1,371 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/ftl.h" + +#include "ftl_io.h" +#include "ftl_core.h" +#include "ftl_rwb.h" +#include "ftl_band.h" + +size_t +ftl_io_inc_req(struct ftl_io *io) +{ + struct ftl_band *band = io->band; + + if (io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) { + ftl_band_acquire_md(band); + } + + __atomic_fetch_add(&io->dev->num_inflight, 1, __ATOMIC_SEQ_CST); + + return ++io->req_cnt; +} + +size_t +ftl_io_dec_req(struct ftl_io *io) +{ + struct ftl_band *band = io->band; + unsigned long num_inflight __attribute__((unused)); + + if (io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) { + ftl_band_release_md(band); + } + + num_inflight = __atomic_fetch_sub(&io->dev->num_inflight, 1, __ATOMIC_SEQ_CST); + + assert(num_inflight > 0); + assert(io->req_cnt > 0); + + return --io->req_cnt; +} + +struct iovec * +ftl_io_iovec(struct ftl_io *io) +{ + if (io->iov_cnt > 1) { + return io->iovs; + } else { + return &io->iov; + } +} + +uint64_t +ftl_io_current_lba(struct ftl_io *io) +{ + if (io->flags & FTL_IO_VECTOR_LBA) { + return io->lbas[io->pos]; + } else { + return io->lba + io->pos; + } +} + +void +ftl_io_update_iovec(struct ftl_io *io, size_t lbk_cnt) +{ + struct iovec *iov = ftl_io_iovec(io); + size_t iov_lbks; + + while (lbk_cnt > 0) { + assert(io->iov_pos < io->iov_cnt); + iov_lbks = iov[io->iov_pos].iov_len / PAGE_SIZE; + + if (io->iov_off + lbk_cnt < iov_lbks) { + io->iov_off += lbk_cnt; + break; + } + + assert(iov_lbks > io->iov_off); + lbk_cnt -= (iov_lbks - io->iov_off); + io->iov_off = 0; + io->iov_pos++; + } +} + +size_t +ftl_iovec_num_lbks(struct iovec *iov, size_t iov_cnt) +{ + size_t lbks = 0, i = 0; + + for (; i < iov_cnt; ++i) { + lbks += iov[i].iov_len / PAGE_SIZE; + } + + return lbks; +} + +void * +ftl_io_iovec_addr(struct ftl_io *io) +{ + assert(io->iov_pos < io->iov_cnt); + assert(io->iov_off * PAGE_SIZE < ftl_io_iovec(io)[io->iov_pos].iov_len); + + return (char *)ftl_io_iovec(io)[io->iov_pos].iov_base + + io->iov_off * PAGE_SIZE; +} + +size_t +ftl_io_iovec_len_left(struct ftl_io *io) +{ + struct iovec *iov = ftl_io_iovec(io); + return iov[io->iov_pos].iov_len / PAGE_SIZE - io->iov_off; +} + +int +ftl_io_init_iovec(struct ftl_io *io, void *buf, + size_t iov_cnt, size_t req_size) +{ + struct iovec *iov; + size_t i; + + if (iov_cnt > 1) { + iov = io->iovs = calloc(iov_cnt, sizeof(*iov)); + if (!iov) { + return -ENOMEM; + } + } else { + iov = &io->iov; + } + + io->iov_pos = 0; + io->iov_cnt = iov_cnt; + for (i = 0; i < iov_cnt; ++i) { + iov[i].iov_base = (char *)buf + i * req_size * PAGE_SIZE; + iov[i].iov_len = req_size * PAGE_SIZE; + } + + return 0; +} + +static void +ftl_io_init(struct ftl_io *io, struct spdk_ftl_dev *dev, + spdk_ftl_fn fn, void *ctx, int flags, int type) +{ + io->flags |= flags | FTL_IO_INITIALIZED; + io->type = type; + io->dev = dev; + io->lba = FTL_LBA_INVALID; + io->cb.fn = fn; + io->cb.ctx = ctx; + io->trace = ftl_trace_alloc_group(dev->stats.trace); +} + +struct ftl_io * +ftl_io_init_internal(const struct ftl_io_init_opts *opts) +{ + struct ftl_io *io = opts->io; + struct spdk_ftl_dev *dev = opts->dev; + + if (!io) { + io = ftl_io_alloc(dev->ioch); + if (!io) { + return NULL; + } + } + + ftl_io_clear(io); + ftl_io_init(io, dev, opts->fn, io, opts->flags | FTL_IO_INTERNAL, opts->type); + + io->lbk_cnt = opts->iov_cnt * opts->req_size; + io->rwb_batch = opts->rwb_batch; + io->band = opts->band; + io->md = io->md; + + if (ftl_io_init_iovec(io, opts->data, opts->iov_cnt, opts->req_size)) { + if (!opts->io) { + ftl_io_free(io); + } + return NULL; + } + + return io; +} + +struct ftl_io * +ftl_io_rwb_init(struct spdk_ftl_dev *dev, struct ftl_band *band, + struct ftl_rwb_batch *batch, spdk_ftl_fn cb) +{ + struct ftl_io_init_opts opts = { + .dev = dev, + .io = NULL, + .rwb_batch = batch, + .band = band, + .size = sizeof(struct ftl_io), + .flags = 0, + .type = FTL_IO_WRITE, + .iov_cnt = 1, + .req_size = dev->xfer_size, + .fn = cb, + .data = ftl_rwb_batch_get_data(batch), + .md = ftl_rwb_batch_get_md(batch), + }; + + return ftl_io_init_internal(&opts); +} + +struct ftl_io * +ftl_io_erase_init(struct ftl_band *band, size_t lbk_cnt, spdk_ftl_fn cb) +{ + struct ftl_io *io; + struct ftl_io_init_opts opts = { + .dev = band->dev, + .io = NULL, + .rwb_batch = NULL, + .band = band, + .size = sizeof(struct ftl_io), + .flags = FTL_IO_PPA_MODE, + .type = FTL_IO_ERASE, + .iov_cnt = 0, + .req_size = 1, + .fn = cb, + .data = NULL, + .md = NULL, + }; + + io = ftl_io_init_internal(&opts); + io->lbk_cnt = lbk_cnt; + + return io; +} + +void +ftl_io_user_init(struct spdk_ftl_dev *dev, struct ftl_io *io, uint64_t lba, size_t lbk_cnt, + struct iovec *iov, size_t iov_cnt, + spdk_ftl_fn cb_fn, void *cb_arg, int type) +{ + if (io->flags & FTL_IO_INITIALIZED) { + return; + } + + ftl_io_init(io, dev, cb_fn, cb_arg, 0, type); + + io->lba = lba; + io->lbk_cnt = lbk_cnt; + io->iov_cnt = iov_cnt; + + if (iov_cnt > 1) { + io->iovs = iov; + } else { + io->iov = *iov; + } + + ftl_trace(lba_io_init, ftl_dev_trace(io->dev), io); +} + +void +ftl_io_complete(struct ftl_io *io) +{ + int keep_alive = io->flags & FTL_IO_KEEP_ALIVE; + + io->flags &= ~FTL_IO_INITIALIZED; + io->cb.fn(io->cb.ctx, io->status); + + if (!keep_alive) { + ftl_io_free(io); + } +} + +void +ftl_io_process_error(struct ftl_io *io, const struct spdk_nvme_cpl *status) +{ + io->status = -EIO; + + /* TODO: add error handling for specifc cases */ + if (status->status.sct == SPDK_NVME_SCT_MEDIA_ERROR && + status->status.sc == SPDK_OCSSD_SC_READ_HIGH_ECC) { + io->status = 0; + } +} + +void * +ftl_io_get_md(const struct ftl_io *io) +{ + if (!io->md) { + return NULL; + } + + return (char *)io->md + io->pos * FTL_BLOCK_SIZE; +} + +struct ftl_io * +ftl_io_alloc(struct spdk_io_channel *ch) +{ + struct ftl_io *io; + struct ftl_io_channel *ioch = spdk_io_channel_get_ctx(ch); + + io = spdk_mempool_get(ioch->io_pool); + if (!io) { + return NULL; + } + + memset(io, 0, ioch->elem_size); + io->ch = ch; + return io; +} + +void +ftl_io_reinit(struct ftl_io *io, spdk_ftl_fn fn, void *ctx, int flags, int type) +{ + ftl_io_clear(io); + ftl_io_init(io, io->dev, fn, ctx, flags, type); +} + +void +ftl_io_clear(struct ftl_io *io) +{ + io->pos = 0; + io->req_cnt = 0; + io->iov_pos = 0; + io->iov_off = 0; + io->flags = 0; + io->rwb_batch = NULL; + io->band = NULL; +} + +void +ftl_io_free(struct ftl_io *io) +{ + struct ftl_io_channel *ioch; + + if (!io) { + return; + } + + if ((io->flags & FTL_IO_INTERNAL) && io->iov_cnt > 1) { + free(io->iovs); + } + + ioch = spdk_io_channel_get_ctx(io->ch); + spdk_mempool_put(ioch->io_pool, io); +} diff --git a/lib/ftl/ftl_io.h b/lib/ftl/ftl_io.h index b5f0f9409..afa293251 100644 --- a/lib/ftl/ftl_io.h +++ b/lib/ftl/ftl_io.h @@ -34,9 +34,10 @@ #ifndef FTL_IO_H #define FTL_IO_H -#include -#include -#include +#include "spdk/stdinc.h" +#include "spdk/nvme.h" +#include "spdk/ftl.h" + #include "ftl_ppa.h" #include "ftl_trace.h" @@ -250,8 +251,6 @@ void *ftl_io_iovec_addr(struct ftl_io *io); size_t ftl_io_iovec_len_left(struct ftl_io *io); int ftl_io_init_iovec(struct ftl_io *io, void *buf, size_t iov_cnt, size_t req_size); -void ftl_io_init(struct ftl_io *io, struct spdk_ftl_dev *dev, - spdk_ftl_fn cb, void *ctx, int flags, int type); struct ftl_io *ftl_io_init_internal(const struct ftl_io_init_opts *opts); struct ftl_io *ftl_io_rwb_init(struct spdk_ftl_dev *dev, struct ftl_band *band, struct ftl_rwb_batch *entry, spdk_ftl_fn cb); diff --git a/lib/ftl/ftl_ppa.h b/lib/ftl/ftl_ppa.h index c4e11372f..6c620ab75 100644 --- a/lib/ftl/ftl_ppa.h +++ b/lib/ftl/ftl_ppa.h @@ -34,7 +34,7 @@ #ifndef FTL_PPA_H #define FTL_PPA_H -#include +#include "spdk/stdinc.h" /* Marks PPA as invalid */ #define FTL_PPA_INVALID (-1) diff --git a/lib/ftl/ftl_rwb.c b/lib/ftl/ftl_rwb.c new file mode 100644 index 000000000..478f84a14 --- /dev/null +++ b/lib/ftl/ftl_rwb.c @@ -0,0 +1,461 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/util.h" + +#include "ftl_rwb.h" +#include "ftl_core.h" + +struct ftl_rwb_batch { + /* Parent RWB */ + struct ftl_rwb *rwb; + + /* Position within RWB */ + unsigned int pos; + + /* Number of acquired entries */ + unsigned int num_acquired; + + /* Number of entries ready for submission */ + unsigned int num_ready; + + /* RWB entry list */ + LIST_HEAD(, ftl_rwb_entry) entry_list; + + /* Entry buffer */ + struct ftl_rwb_entry *entries; + + /* Data buffer */ + void *buffer; + + /* Metadata buffer */ + void *md_buffer; + + /* Queue entry */ + STAILQ_ENTRY(ftl_rwb_batch) stailq; +}; + +struct ftl_rwb { + /* Number of batches */ + size_t num_batches; + + /* Number of entries per batch */ + size_t xfer_size; + + /* Metadata's size */ + size_t md_size; + + /* Number of acquired entries */ + unsigned int num_acquired[FTL_RWB_TYPE_MAX]; + + /* User/internal limits */ + size_t limits[FTL_RWB_TYPE_MAX]; + + /* Current batch */ + struct ftl_rwb_batch *current; + + /* Free batch queue */ + STAILQ_HEAD(, ftl_rwb_batch) free_queue; + + /* Submission batch queue */ + struct spdk_ring *submit_queue; + + /* Batch buffer */ + struct ftl_rwb_batch *batches; + + /* RWB lock */ + pthread_spinlock_t lock; +}; + +static int +ftl_rwb_batch_full(const struct ftl_rwb_batch *batch, size_t batch_size) +{ + struct ftl_rwb *rwb = batch->rwb; + assert(batch_size <= rwb->xfer_size); + return batch_size == rwb->xfer_size; +} + +static void +ftl_rwb_batch_init_entry(struct ftl_rwb_batch *batch, size_t pos) +{ + struct ftl_rwb *rwb = batch->rwb; + struct ftl_rwb_entry *entry, *prev; + size_t batch_offset = pos % rwb->xfer_size; + + entry = &batch->entries[batch_offset]; + entry->pos = pos; + entry->data = ((char *)batch->buffer) + FTL_BLOCK_SIZE * batch_offset; + entry->md = rwb->md_size ? ((char *)batch->md_buffer) + rwb->md_size * batch_offset : NULL; + entry->batch = batch; + entry->rwb = batch->rwb; + pthread_spin_init(&entry->lock, PTHREAD_PROCESS_PRIVATE); + + if (batch_offset > 0) { + prev = &batch->entries[batch_offset - 1]; + LIST_INSERT_AFTER(prev, entry, list_entry); + } else { + LIST_INSERT_HEAD(&batch->entry_list, entry, list_entry); + } +} + +static int +ftl_rwb_batch_init(struct ftl_rwb *rwb, struct ftl_rwb_batch *batch, unsigned int pos) +{ + size_t md_size, i; + + md_size = spdk_divide_round_up(rwb->md_size * rwb->xfer_size, FTL_BLOCK_SIZE) * + FTL_BLOCK_SIZE; + batch->rwb = rwb; + batch->pos = pos; + + batch->entries = calloc(rwb->xfer_size, sizeof(*batch->entries)); + if (!batch->entries) { + return -1; + } + + batch->buffer = spdk_dma_zmalloc(FTL_BLOCK_SIZE * rwb->xfer_size, + FTL_BLOCK_SIZE, NULL); + if (!batch->buffer) { + goto error; + } + + if (md_size > 0) { + batch->md_buffer = spdk_dma_zmalloc(md_size, FTL_BLOCK_SIZE, NULL); + if (!batch->md_buffer) { + goto error; + } + } + + LIST_INIT(&batch->entry_list); + + for (i = 0; i < rwb->xfer_size; ++i) { + ftl_rwb_batch_init_entry(batch, pos * rwb->xfer_size + i); + } + + return 0; +error: + free(batch->entries); + spdk_dma_free(batch->buffer); + return -1; +} + +struct ftl_rwb * +ftl_rwb_init(const struct spdk_ftl_conf *conf, size_t xfer_size, size_t md_size) +{ + struct ftl_rwb *rwb; + struct ftl_rwb_batch *batch; + size_t ring_size, i; + + rwb = calloc(1, sizeof(*rwb)); + if (!rwb) { + goto error; + } + + assert(conf->rwb_size % xfer_size == 0); + + rwb->xfer_size = xfer_size; + rwb->md_size = md_size; + rwb->num_batches = conf->rwb_size / (FTL_BLOCK_SIZE * xfer_size); + + ring_size = spdk_align32pow2(rwb->num_batches); + + rwb->batches = calloc(rwb->num_batches, sizeof(*rwb->batches)); + if (!rwb->batches) { + goto error; + } + + rwb->submit_queue = spdk_ring_create(SPDK_RING_TYPE_MP_SC, ring_size, + SPDK_ENV_SOCKET_ID_ANY); + if (!rwb->submit_queue) { + SPDK_ERRLOG("Failed to create submission queue\n"); + goto error; + } + + /* TODO: use rte_ring with SP / MC */ + STAILQ_INIT(&rwb->free_queue); + + for (i = 0; i < rwb->num_batches; ++i) { + batch = &rwb->batches[i]; + + if (ftl_rwb_batch_init(rwb, batch, i)) { + SPDK_ERRLOG("Failed to initialize RWB entry buffer\n"); + goto error; + } + + STAILQ_INSERT_TAIL(&rwb->free_queue, batch, stailq); + } + + for (unsigned int i = 0; i < FTL_RWB_TYPE_MAX; ++i) { + rwb->limits[i] = ftl_rwb_entry_cnt(rwb); + } + + pthread_spin_init(&rwb->lock, PTHREAD_PROCESS_PRIVATE); + return rwb; +error: + ftl_rwb_free(rwb); + return NULL; +} + +void +ftl_rwb_free(struct ftl_rwb *rwb) +{ + struct ftl_rwb_entry *entry; + struct ftl_rwb_batch *batch; + + if (!rwb) { + return; + } + + for (size_t i = 0; i < rwb->num_batches; ++i) { + batch = &rwb->batches[i]; + + ftl_rwb_foreach(entry, batch) { + pthread_spin_destroy(&entry->lock); + } + + spdk_dma_free(batch->buffer); + spdk_dma_free(batch->md_buffer); + free(batch->entries); + } + + pthread_spin_destroy(&rwb->lock); + spdk_ring_free(rwb->submit_queue); + free(rwb->batches); + free(rwb); +} + +void +ftl_rwb_batch_release(struct ftl_rwb_batch *batch) +{ + struct ftl_rwb *rwb = batch->rwb; + struct ftl_rwb_entry *entry; + unsigned int num_acquired __attribute__((unused)); + + batch->num_ready = 0; + batch->num_acquired = 0; + + ftl_rwb_foreach(entry, batch) { + num_acquired = __atomic_fetch_sub(&rwb->num_acquired[ftl_rwb_entry_type(entry)], 1, + __ATOMIC_SEQ_CST); + assert(num_acquired > 0); + } + + pthread_spin_lock(&rwb->lock); + STAILQ_INSERT_TAIL(&rwb->free_queue, batch, stailq); + pthread_spin_unlock(&rwb->lock); +} + +size_t +ftl_rwb_entry_cnt(const struct ftl_rwb *rwb) +{ + return rwb->num_batches * rwb->xfer_size; +} + +size_t +ftl_rwb_num_batches(const struct ftl_rwb *rwb) +{ + return rwb->num_batches; +} + +size_t +ftl_rwb_batch_get_offset(const struct ftl_rwb_batch *batch) +{ + return batch->pos; +} + +void +ftl_rwb_set_limits(struct ftl_rwb *rwb, + const size_t limit[FTL_RWB_TYPE_MAX]) +{ + assert(limit[FTL_RWB_TYPE_USER] <= ftl_rwb_entry_cnt(rwb)); + assert(limit[FTL_RWB_TYPE_INTERNAL] <= ftl_rwb_entry_cnt(rwb)); + memcpy(rwb->limits, limit, sizeof(rwb->limits)); +} + +void +ftl_rwb_get_limits(struct ftl_rwb *rwb, + size_t limit[FTL_RWB_TYPE_MAX]) +{ + memcpy(limit, rwb->limits, sizeof(rwb->limits)); +} + +size_t +ftl_rwb_num_acquired(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type) +{ + return __atomic_load_n(&rwb->num_acquired[type], __ATOMIC_SEQ_CST); +} + +void +ftl_rwb_batch_revert(struct ftl_rwb_batch *batch) +{ + struct ftl_rwb *rwb = batch->rwb; + + if (spdk_ring_enqueue(rwb->submit_queue, (void **)&batch, 1) != 1) { + assert(0 && "Should never happen"); + } +} + +void +ftl_rwb_push(struct ftl_rwb_entry *entry) +{ + struct ftl_rwb_batch *batch = entry->batch; + struct ftl_rwb *rwb = batch->rwb; + size_t batch_size; + + batch_size = __atomic_fetch_add(&batch->num_ready, 1, __ATOMIC_SEQ_CST) + 1; + + /* Once all of the entries are put back, push the batch on the */ + /* submission queue */ + if (ftl_rwb_batch_full(batch, batch_size)) { + if (spdk_ring_enqueue(rwb->submit_queue, (void **)&batch, 1) != 1) { + assert(0 && "Should never happen"); + } + } +} + +static int +ftl_rwb_check_limits(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type) +{ + return ftl_rwb_num_acquired(rwb, type) >= rwb->limits[type]; +} + +struct ftl_rwb_entry * +ftl_rwb_acquire(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type) +{ + struct ftl_rwb_entry *entry = NULL; + struct ftl_rwb_batch *current; + + if (ftl_rwb_check_limits(rwb, type)) { + return NULL; + } + + pthread_spin_lock(&rwb->lock); + + current = rwb->current; + if (!current) { + current = STAILQ_FIRST(&rwb->free_queue); + if (!current) { + goto error; + } + + STAILQ_REMOVE(&rwb->free_queue, current, ftl_rwb_batch, stailq); + rwb->current = current; + } + + entry = ¤t->entries[current->num_acquired++]; + + /* If the whole batch is filled, clear the current batch pointer */ + if (current->num_acquired >= rwb->xfer_size) { + rwb->current = NULL; + } + + pthread_spin_unlock(&rwb->lock); + __atomic_fetch_add(&rwb->num_acquired[type], 1, __ATOMIC_SEQ_CST); + return entry; +error: + pthread_spin_unlock(&rwb->lock); + return NULL; +} + +struct ftl_rwb_batch * +ftl_rwb_pop(struct ftl_rwb *rwb) +{ + struct ftl_rwb_batch *batch = NULL; + + if (spdk_ring_dequeue(rwb->submit_queue, (void **)&batch, 1) != 1) { + return NULL; + } + + return batch; +} + +static struct ftl_rwb_batch * +_ftl_rwb_next_batch(struct ftl_rwb *rwb, size_t pos) +{ + if (pos >= rwb->num_batches) { + return NULL; + } + + return &rwb->batches[pos]; +} + +struct ftl_rwb_batch * +ftl_rwb_next_batch(struct ftl_rwb_batch *batch) +{ + return _ftl_rwb_next_batch(batch->rwb, batch->pos + 1); +} + +struct ftl_rwb_batch * +ftl_rwb_first_batch(struct ftl_rwb *rwb) +{ + return _ftl_rwb_next_batch(rwb, 0); +} + +int +ftl_rwb_batch_empty(struct ftl_rwb_batch *batch) +{ + return __atomic_load_n(&batch->num_ready, __ATOMIC_SEQ_CST) == 0; +} + +void * +ftl_rwb_batch_get_data(struct ftl_rwb_batch *batch) +{ + return batch->buffer; +} + +void * +ftl_rwb_batch_get_md(struct ftl_rwb_batch *batch) +{ + return batch->md_buffer; +} + +struct ftl_rwb_entry * +ftl_rwb_entry_from_offset(struct ftl_rwb *rwb, size_t offset) +{ + unsigned int b_off, e_off; + + b_off = offset / rwb->xfer_size; + e_off = offset % rwb->xfer_size; + + assert(b_off < rwb->num_batches); + + return &rwb->batches[b_off].entries[e_off]; +} + +struct ftl_rwb_entry * +ftl_rwb_batch_first_entry(struct ftl_rwb_batch *batch) +{ + return LIST_FIRST(&batch->entry_list); +} diff --git a/lib/ftl/ftl_rwb.h b/lib/ftl/ftl_rwb.h new file mode 100644 index 000000000..406bf5de6 --- /dev/null +++ b/lib/ftl/ftl_rwb.h @@ -0,0 +1,162 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FTL_RWB_H +#define FTL_RWB_H + +#include "spdk/stdinc.h" +#include "spdk/queue.h" + +#include "ftl_io.h" +#include "ftl_ppa.h" +#include "ftl_trace.h" + +struct ftl_rwb; +struct spdk_ftl_conf; +struct ftl_rwb_batch; + +enum ftl_rwb_entry_type { + FTL_RWB_TYPE_INTERNAL, + FTL_RWB_TYPE_USER, + FTL_RWB_TYPE_MAX +}; + +/* Write buffer entry */ +struct ftl_rwb_entry { + /* Owner rwb */ + struct ftl_rwb *rwb; + + /* Batch containing the entry */ + struct ftl_rwb_batch *batch; + + /* Logical address */ + uint64_t lba; + + /* Physical address */ + struct ftl_ppa ppa; + + /* Position within the rwb's buffer */ + unsigned int pos; + + /* Data pointer */ + void *data; + + /* Metadata pointer */ + void *md; + + /* Data/state lock */ + pthread_spinlock_t lock; + + /* Flags */ + unsigned int flags; + + /* Indicates whether the entry is part of cache and is assigned a PPA */ + bool valid; + + /* Trace group id */ + ftl_trace_group_t trace; + + /* Batch list entry */ + LIST_ENTRY(ftl_rwb_entry) list_entry; +}; + +struct ftl_rwb *ftl_rwb_init(const struct spdk_ftl_conf *conf, size_t xfer_size, size_t md_size); +void ftl_rwb_free(struct ftl_rwb *rwb); +void ftl_rwb_batch_release(struct ftl_rwb_batch *batch); +void ftl_rwb_push(struct ftl_rwb_entry *entry); +size_t ftl_rwb_entry_cnt(const struct ftl_rwb *rwb); +void ftl_rwb_set_limits(struct ftl_rwb *rwb, const size_t limit[FTL_RWB_TYPE_MAX]); +void ftl_rwb_get_limits(struct ftl_rwb *rwb, size_t limit[FTL_RWB_TYPE_MAX]); +size_t ftl_rwb_num_acquired(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type); +size_t ftl_rwb_num_batches(const struct ftl_rwb *rwb); +struct ftl_rwb_entry *ftl_rwb_acquire(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type); +struct ftl_rwb_batch *ftl_rwb_pop(struct ftl_rwb *rwb); +struct ftl_rwb_batch *ftl_rwb_first_batch(struct ftl_rwb *rwb); +struct ftl_rwb_batch *ftl_rwb_next_batch(struct ftl_rwb_batch *batch); +int ftl_rwb_batch_empty(struct ftl_rwb_batch *batch); +struct ftl_rwb_entry *ftl_rwb_entry_from_offset(struct ftl_rwb *rwb, size_t offset); +size_t ftl_rwb_batch_get_offset(const struct ftl_rwb_batch *batch); +void ftl_rwb_batch_revert(struct ftl_rwb_batch *batch); +struct ftl_rwb_entry *ftl_rwb_batch_first_entry(struct ftl_rwb_batch *batch); +void *ftl_rwb_batch_get_data(struct ftl_rwb_batch *batch); +void *ftl_rwb_batch_get_md(struct ftl_rwb_batch *batch); + +static inline void +_ftl_rwb_entry_set_valid(struct ftl_rwb_entry *entry, bool valid) +{ + __atomic_store_n(&entry->valid, valid, __ATOMIC_SEQ_CST); +} + +static inline void +ftl_rwb_entry_set_valid(struct ftl_rwb_entry *entry) +{ + _ftl_rwb_entry_set_valid(entry, true); +} + +static inline void +ftl_rwb_entry_invalidate(struct ftl_rwb_entry *entry) +{ + _ftl_rwb_entry_set_valid(entry, false); +} + +static inline int +ftl_rwb_entry_valid(struct ftl_rwb_entry *entry) +{ + return __atomic_load_n(&entry->valid, __ATOMIC_SEQ_CST); +} + +static inline enum ftl_rwb_entry_type +ftl_rwb_type_from_flags(int flags) { + return (flags & FTL_IO_INTERNAL) ? FTL_RWB_TYPE_INTERNAL : FTL_RWB_TYPE_USER; +} + +static inline enum ftl_rwb_entry_type +ftl_rwb_entry_type(const struct ftl_rwb_entry *entry) { + return ftl_rwb_type_from_flags(entry->flags); +} + +static inline int +ftl_rwb_entry_internal(const struct ftl_rwb_entry *entry) +{ + return ftl_rwb_entry_type(entry) == FTL_RWB_TYPE_INTERNAL; +} + +#define ftl_rwb_foreach(entry, batch) \ + for (entry = ftl_rwb_batch_first_entry(batch); \ + entry; entry = LIST_NEXT(entry, list_entry)) + +#define ftl_rwb_foreach_batch(batch, rwb) \ + for (batch = ftl_rwb_first_batch(rwb); batch; \ + batch = ftl_rwb_next_batch(batch)) + +#endif /* FTL_RWB_H */ diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk index dd16a0123..c15e0ea5e 100644 --- a/mk/spdk.modules.mk +++ b/mk/spdk.modules.mk @@ -66,6 +66,10 @@ BLOCKDEV_MODULES_LIST += bdev_pmem SYS_LIBS += -lpmemblk endif +ifeq ($(CONFIG_FTL),y) +BLOCKDEV_MODULES_LIST += ftl +endif + SOCK_MODULES_LIST = sock_posix ifeq ($(CONFIG_VPP),y) diff --git a/test/common/autotest_common.sh b/test/common/autotest_common.sh index 91b170230..0fad11fe8 100644 --- a/test/common/autotest_common.sh +++ b/test/common/autotest_common.sh @@ -60,6 +60,7 @@ fi : ${SPDK_RUN_UBSAN=1}; export SPDK_RUN_UBSAN : ${SPDK_RUN_INSTALLED_DPDK=1}; export SPDK_RUN_INSTALLED_DPDK : ${SPDK_TEST_CRYPTO=1}; export SPDK_TEST_CRYPTO +: ${SPDK_TEST_FTL=0}; export SPDK_TEST_FTL if [ -z "$DEPENDENCY_DIR" ]; then export DEPENDENCY_DIR=/home/sys_sgsw @@ -187,6 +188,10 @@ if [ ! -d "${DEPENDENCY_DIR}/nvme-cli" ]; then export SPDK_TEST_NVME_CLI=0 fi +if [ $SPDK_TEST_FTL -eq 1 ]; then + config_params+=' --with-ftl' +fi + export config_params if [ -z "$output_dir" ]; then