diff --git a/CONFIG b/CONFIG index 3104212a2..31bd63422 100644 --- a/CONFIG +++ b/CONFIG @@ -118,3 +118,6 @@ CONFIG_VTUNE_DIR= # Build the dpdk igb_uio driver CONFIG_IGB_UIO_DRIVER=n + +# Build FTL library +CONFIG_FTL=n diff --git a/configure b/configure index cba938dde..c8867b66b 100755 --- a/configure +++ b/configure @@ -263,6 +263,12 @@ for i in "$@"; do --without-igb-uio-driver) CONFIG[IGB_UIO_DRIVER]=n ;; + --with-ftl) + CONFIG[FTL]=y + ;; + --without-ftl) + CONFIG[FTL]=n + ;; --) break ;; diff --git a/lib/Makefile b/lib/Makefile index 5582ef6ab..40c2df08d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -44,6 +44,7 @@ DIRS-$(CONFIG_VIRTIO) += virtio endif DIRS-$(CONFIG_REDUCE) += reduce +DIRS-$(CONFIG_FTL) += ftl # If CONFIG_ENV is pointing at a directory in lib, build it. # Out-of-tree env implementations must be built separately by the user. diff --git a/lib/ftl/Makefile b/lib/ftl/Makefile new file mode 100644 index 000000000..e64a7d85f --- /dev/null +++ b/lib/ftl/Makefile @@ -0,0 +1,40 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +C_SRCS = ftl_band.c ftl_core.c ftl_debug.c ftl_io.c ftl_rwb.c +LIBNAME = ftl + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/ftl/ftl_band.c b/lib/ftl/ftl_band.c new file mode 100644 index 000000000..575b1de67 --- /dev/null +++ b/lib/ftl/ftl_band.c @@ -0,0 +1,969 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/crc32.h" +#include "spdk/likely.h" +#include "spdk/util.h" +#include "spdk/ftl.h" + +#include "ftl_band.h" +#include "ftl_io.h" +#include "ftl_core.h" +#include "ftl_debug.h" + +/* TODO: define some signature for meta version */ +#define FTL_MD_VER 1 + +struct __attribute__((packed)) ftl_md_hdr { + /* Device instance */ + struct spdk_uuid uuid; + + /* Meta version */ + uint8_t ver; + + /* Sequence number */ + uint64_t seq; + + /* CRC32 checksum */ + uint32_t checksum; +}; + +/* End metadata layout stored on media (with all three being aligned to block size): */ +/* - header */ +/* - valid bitmap */ +/* - LBA map */ +struct __attribute__((packed)) ftl_tail_md { + struct ftl_md_hdr hdr; + + /* Max number of lbks */ + uint64_t num_lbks; + + uint8_t reserved[4059]; +}; +SPDK_STATIC_ASSERT(sizeof(struct ftl_tail_md) == FTL_BLOCK_SIZE, "Incorrect metadata size"); + +struct __attribute__((packed)) ftl_head_md { + struct ftl_md_hdr hdr; + + /* Number of defrag cycles */ + uint64_t wr_cnt; + + /* Number of surfaced LBAs */ + uint64_t lba_cnt; + + /* Transfer size */ + uint32_t xfer_size; +}; + +size_t +ftl_tail_md_hdr_num_lbks(void) +{ + return spdk_divide_round_up(sizeof(struct ftl_tail_md), FTL_BLOCK_SIZE); +} + +size_t +ftl_vld_map_num_lbks(const struct spdk_ftl_dev *dev) +{ + return spdk_divide_round_up(ftl_vld_map_size(dev), FTL_BLOCK_SIZE); +} + +size_t +ftl_lba_map_num_lbks(const struct spdk_ftl_dev *dev) +{ + return spdk_divide_round_up(ftl_num_band_lbks(dev) * sizeof(uint64_t), FTL_BLOCK_SIZE); +} + +size_t +ftl_head_md_num_lbks(const struct spdk_ftl_dev *dev) +{ + return dev->xfer_size; +} + +size_t +ftl_tail_md_num_lbks(const struct spdk_ftl_dev *dev) +{ + return spdk_divide_round_up(ftl_tail_md_hdr_num_lbks() + + ftl_vld_map_num_lbks(dev) + + ftl_lba_map_num_lbks(dev), + dev->xfer_size) * dev->xfer_size; +} + +static uint64_t +ftl_band_tail_md_offset(struct ftl_band *band) +{ + return ftl_band_num_usable_lbks(band) - + ftl_tail_md_num_lbks(band->dev); +} + +int +ftl_band_full(struct ftl_band *band, size_t offset) +{ + return offset == ftl_band_tail_md_offset(band); +} + +void +ftl_band_write_failed(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + + band->high_prio = 1; + band->tail_md_ppa = ftl_to_ppa(FTL_PPA_INVALID); + + if (!dev->df_band) { + dev->df_band = band; + } + + ftl_band_set_state(band, FTL_BAND_STATE_CLOSED); +} + +void +ftl_band_clear_md(struct ftl_band *band) +{ + spdk_bit_array_clear_mask(band->md.vld_map); + memset(band->md.lba_map, 0, ftl_num_band_lbks(band->dev) * sizeof(uint64_t)); + band->md.num_vld = 0; +} + +static void +ftl_band_free_md(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md *md = &band->md; + + assert(band->state == FTL_BAND_STATE_CLOSED || + band->state == FTL_BAND_STATE_FREE); + assert(md->ref_cnt == 0); + assert(md->lba_map != NULL); + assert(!band->high_prio); + + /* Verify that band's metadata is consistent with l2p */ + if (band->num_chunks) { + assert(ftl_band_validate_md(band, band->md.lba_map) == true); + } + + spdk_mempool_put(dev->lba_pool, md->lba_map); + md->lba_map = NULL; +} + +static void +_ftl_band_set_free(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_band *lband, *prev; + + /* Verify band's previous state */ + assert(band->state == FTL_BAND_STATE_CLOSED); + + if (band == dev->df_band) { + dev->df_band = NULL; + } + + /* Remove the band from the closed band list */ + LIST_REMOVE(band, list_entry); + + /* Keep the list sorted by band's write count */ + LIST_FOREACH(lband, &dev->free_bands, list_entry) { + if (lband->md.wr_cnt > band->md.wr_cnt) { + LIST_INSERT_BEFORE(lband, band, list_entry); + break; + } + prev = lband; + } + + if (!lband) { + if (LIST_EMPTY(&dev->free_bands)) { + LIST_INSERT_HEAD(&dev->free_bands, band, list_entry); + } else { + LIST_INSERT_AFTER(prev, band, list_entry); + } + } + +#if defined(DEBUG) + prev = NULL; + LIST_FOREACH(lband, &dev->free_bands, list_entry) { + if (!prev) { + continue; + } + assert(prev->md.wr_cnt <= lband->md.wr_cnt); + } +#endif + dev->num_free++; + ftl_apply_limits(dev); +} + +static void +_ftl_band_set_opening(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md *md = &band->md; + + /* Verify band's previous state */ + assert(band->state == FTL_BAND_STATE_PREP); + LIST_REMOVE(band, list_entry); + + md->wr_cnt++; + + assert(dev->num_free > 0); + dev->num_free--; + + ftl_apply_limits(dev); +} + +static void +_ftl_band_set_closed(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_chunk *chunk; + + /* TODO: add this kind of check in band_set_state() */ + if (band->state == FTL_BAND_STATE_CLOSED) { + return; + } + + /* Set the state as free_md() checks for that */ + band->state = FTL_BAND_STATE_CLOSED; + + /* Free the md if there are no outstanding IOs */ + ftl_band_release_md(band); + + if (spdk_likely(band->num_chunks)) { + LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry); + CIRCLEQ_FOREACH(chunk, &band->chunks, circleq) { + chunk->state = FTL_CHUNK_STATE_CLOSED; + } + } else { + LIST_REMOVE(band, list_entry); + } +} + +static uint32_t +ftl_md_calc_crc(const struct ftl_md_hdr *hdr, size_t size) +{ + size_t checkoff = offsetof(struct ftl_md_hdr, checksum); + size_t mdoff = checkoff + sizeof(hdr->checksum); + uint32_t crc; + + crc = spdk_crc32c_update(hdr, checkoff, 0); + return spdk_crc32c_update((const char *)hdr + mdoff, size - mdoff, crc); +} + +static void +ftl_set_md_hdr(struct spdk_ftl_dev *dev, struct ftl_md_hdr *hdr, + struct ftl_md *md, size_t size) +{ + hdr->seq = md->seq; + hdr->ver = FTL_MD_VER; + hdr->uuid = dev->uuid; + hdr->checksum = ftl_md_calc_crc(hdr, size); +} + +static int +ftl_pack_head_md(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + struct ftl_head_md *head = data; + + head->wr_cnt = md->wr_cnt; + head->lba_cnt = dev->num_lbas; + head->xfer_size = dev->xfer_size; + ftl_set_md_hdr(dev, &head->hdr, md, sizeof(struct ftl_head_md)); + + return FTL_MD_SUCCESS; +} + +static int +ftl_pack_tail_md(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + struct ftl_tail_md *tail = data; + size_t map_size; + void *vld_offset, *map_offset; + + map_size = ftl_num_band_lbks(dev) * sizeof(uint64_t); + vld_offset = (char *)data + ftl_tail_md_hdr_num_lbks() * FTL_BLOCK_SIZE; + map_offset = (char *)vld_offset + ftl_vld_map_num_lbks(dev) * FTL_BLOCK_SIZE; + + /* Clear out the buffer */ + memset(data, 0, ftl_tail_md_num_lbks(dev) * FTL_BLOCK_SIZE); + tail->num_lbks = ftl_num_band_lbks(dev); + + pthread_spin_lock(&md->lock); + spdk_bit_array_store_mask(md->vld_map, vld_offset); + pthread_spin_unlock(&md->lock); + + memcpy(map_offset, md->lba_map, map_size); + ftl_set_md_hdr(dev, &tail->hdr, md, ftl_tail_md_num_lbks(dev) * FTL_BLOCK_SIZE); + + return FTL_MD_SUCCESS; +} + +static int +ftl_md_hdr_vld(struct spdk_ftl_dev *dev, const struct ftl_md_hdr *hdr, size_t size) +{ + if (spdk_uuid_compare(&dev->uuid, &hdr->uuid) != 0) { + return FTL_MD_NO_MD; + } + + if (hdr->ver != FTL_MD_VER) { + return FTL_MD_INVALID_VER; + } + + if (ftl_md_calc_crc(hdr, size) != hdr->checksum) { + return FTL_MD_INVALID_CRC; + } + + return FTL_MD_SUCCESS; +} + +static int +ftl_unpack_tail_md(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + struct ftl_tail_md *tail = data; + size_t map_size; + void *vld_offset, *map_offset; + int rc; + + map_size = ftl_num_band_lbks(dev) * sizeof(uint64_t); + vld_offset = (char *)data + ftl_tail_md_hdr_num_lbks() * FTL_BLOCK_SIZE; + map_offset = (char *)vld_offset + ftl_vld_map_num_lbks(dev) * FTL_BLOCK_SIZE; + + rc = ftl_md_hdr_vld(dev, &tail->hdr, ftl_tail_md_num_lbks(dev) * FTL_BLOCK_SIZE); + if (rc) { + return rc; + } + + if (tail->num_lbks != ftl_num_band_lbks(dev)) { + return FTL_MD_INVALID_SIZE; + } + + if (md->vld_map) { + spdk_bit_array_load_mask(md->vld_map, vld_offset); + } + + if (md->lba_map) { + memcpy(md->lba_map, map_offset, map_size); + } + + md->seq = tail->hdr.seq; + return FTL_MD_SUCCESS; +} + +static int +ftl_unpack_lba_map(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + memcpy(md->lba_map, data, ftl_num_band_lbks(dev) * sizeof(uint64_t)); + return FTL_MD_SUCCESS; +} + +static int +ftl_unpack_head_md(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data) +{ + struct ftl_head_md *head = data; + int rc; + + rc = ftl_md_hdr_vld(dev, &head->hdr, sizeof(struct ftl_head_md)); + if (rc) { + return rc; + } + + md->seq = head->hdr.seq; + md->wr_cnt = head->wr_cnt; + + if (dev->global_md.num_lbas == 0) { + dev->global_md.num_lbas = head->lba_cnt; + } + + if (dev->global_md.num_lbas != head->lba_cnt) { + return FTL_MD_INVALID_SIZE; + } + + if (dev->xfer_size != head->xfer_size) { + return FTL_MD_INVALID_SIZE; + } + + return FTL_MD_SUCCESS; +} + +struct ftl_ppa +ftl_band_tail_md_ppa(struct ftl_band *band) +{ + struct ftl_ppa ppa; + struct ftl_chunk *chunk; + struct spdk_ftl_dev *dev = band->dev; + size_t xfer_size = dev->xfer_size; + size_t num_req = ftl_band_tail_md_offset(band) / xfer_size; + size_t i; + + if (spdk_unlikely(!band->num_chunks)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + + /* Metadata should be aligned to xfer size */ + assert(ftl_band_tail_md_offset(band) % xfer_size == 0); + + chunk = CIRCLEQ_FIRST(&band->chunks); + for (i = 0; i < num_req % band->num_chunks; ++i) { + chunk = ftl_band_next_chunk(band, chunk); + } + + ppa.lbk = (num_req / band->num_chunks) * xfer_size; + ppa.chk = band->id; + ppa.pu = chunk->punit->start_ppa.pu; + ppa.grp = chunk->punit->start_ppa.grp; + + return ppa; +} + +struct ftl_ppa +ftl_band_head_md_ppa(struct ftl_band *band) +{ + struct ftl_ppa ppa; + + if (spdk_unlikely(!band->num_chunks)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + + ppa = CIRCLEQ_FIRST(&band->chunks)->punit->start_ppa; + ppa.chk = band->id; + + return ppa; +} + +void +ftl_band_set_state(struct ftl_band *band, enum ftl_band_state state) +{ + switch (state) { + case FTL_BAND_STATE_FREE: + _ftl_band_set_free(band); + break; + + case FTL_BAND_STATE_OPENING: + _ftl_band_set_opening(band); + break; + + case FTL_BAND_STATE_CLOSED: + _ftl_band_set_closed(band); + break; + + default: + break; + } + + band->state = state; +} + +void +ftl_band_set_addr(struct ftl_band *band, uint64_t lba, struct ftl_ppa ppa) +{ + struct ftl_md *md = &band->md; + uint64_t offset; + + assert(lba != FTL_LBA_INVALID); + + offset = ftl_band_lbkoff_from_ppa(band, ppa); + pthread_spin_lock(&band->md.lock); + + md->num_vld++; + md->lba_map[offset] = lba; + spdk_bit_array_set(md->vld_map, offset); + + pthread_spin_unlock(&band->md.lock); +} + +size_t +ftl_band_age(const struct ftl_band *band) +{ + return (size_t)(band->dev->seq - band->md.seq); +} + +size_t +ftl_band_num_usable_lbks(const struct ftl_band *band) +{ + return band->num_chunks * ftl_dev_lbks_in_chunk(band->dev); +} + +size_t +ftl_band_user_lbks(const struct ftl_band *band) +{ + return ftl_band_num_usable_lbks(band) - + ftl_head_md_num_lbks(band->dev) - + ftl_tail_md_num_lbks(band->dev); +} + +struct ftl_band * +ftl_band_from_ppa(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) +{ + assert(ppa.chk < ftl_dev_num_bands(dev)); + return &dev->bands[ppa.chk]; +} + +struct ftl_chunk * +ftl_band_chunk_from_ppa(struct ftl_band *band, struct ftl_ppa ppa) +{ + struct spdk_ftl_dev *dev = band->dev; + unsigned int punit; + + punit = ftl_ppa_flatten_punit(dev, ppa); + assert(punit < ftl_dev_num_punits(dev)); + + return &band->chunk_buf[punit]; +} + +uint64_t +ftl_band_lbkoff_from_ppa(struct ftl_band *band, struct ftl_ppa ppa) +{ + struct spdk_ftl_dev *dev = band->dev; + unsigned int punit; + + punit = ftl_ppa_flatten_punit(dev, ppa); + assert(ppa.chk == band->id); + + return punit * ftl_dev_lbks_in_chunk(dev) + ppa.lbk; +} + +struct ftl_ppa +ftl_band_next_xfer_ppa(struct ftl_band *band, struct ftl_ppa ppa, size_t num_lbks) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_chunk *chunk; + unsigned int punit_num; + size_t num_xfers, num_stripes; + + assert(ppa.chk == band->id); + + punit_num = ftl_ppa_flatten_punit(dev, ppa); + chunk = &band->chunk_buf[punit_num]; + + num_lbks += (ppa.lbk % dev->xfer_size); + ppa.lbk -= (ppa.lbk % dev->xfer_size); + +#if defined(DEBUG) + /* Check that the number of chunks has not been changed */ + struct ftl_chunk *_chunk; + size_t _num_chunks = 0; + CIRCLEQ_FOREACH(_chunk, &band->chunks, circleq) { + if (spdk_likely(_chunk->state != FTL_CHUNK_STATE_BAD)) { + _num_chunks++; + } + } + assert(band->num_chunks == _num_chunks); +#endif + num_stripes = (num_lbks / dev->xfer_size) / band->num_chunks; + ppa.lbk += num_stripes * dev->xfer_size; + num_lbks -= num_stripes * dev->xfer_size * band->num_chunks; + + if (ppa.lbk > ftl_dev_lbks_in_chunk(dev)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + + num_xfers = num_lbks / dev->xfer_size; + for (size_t i = 0; i < num_xfers; ++i) { + /* When the last chunk is reached the lbk part of the address */ + /* needs to be increased by xfer_size */ + if (ftl_band_chunk_is_last(band, chunk)) { + ppa.lbk += dev->xfer_size; + if (ppa.lbk > ftl_dev_lbks_in_chunk(dev)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + } + + chunk = ftl_band_next_operational_chunk(band, chunk); + ppa.grp = chunk->start_ppa.grp; + ppa.pu = chunk->start_ppa.pu; + + num_lbks -= dev->xfer_size; + } + + if (num_lbks) { + ppa.lbk += num_lbks; + if (ppa.lbk > ftl_dev_lbks_in_chunk(dev)) { + return ftl_to_ppa(FTL_PPA_INVALID); + } + } + + return ppa; +} + +struct ftl_ppa +ftl_band_ppa_from_lbkoff(struct ftl_band *band, uint64_t lbkoff) +{ + struct ftl_ppa ppa = { .ppa = 0 }; + struct spdk_ftl_dev *dev = band->dev; + uint64_t punit; + + punit = lbkoff / ftl_dev_lbks_in_chunk(dev) + dev->range.begin; + + ppa.lbk = lbkoff % ftl_dev_lbks_in_chunk(dev); + ppa.chk = band->id; + ppa.pu = punit / dev->geo.num_grp; + ppa.grp = punit % dev->geo.num_grp; + + return ppa; +} + +struct ftl_ppa +ftl_band_next_ppa(struct ftl_band *band, struct ftl_ppa ppa, size_t offset) +{ + uint64_t lbkoff = ftl_band_lbkoff_from_ppa(band, ppa); + return ftl_band_ppa_from_lbkoff(band, lbkoff + offset); +} + +void +ftl_band_acquire_md(struct ftl_band *band) +{ + assert(band->md.lba_map != NULL); + band->md.ref_cnt++; +} + +int +ftl_band_alloc_md(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md *md = &band->md; + + assert(md->ref_cnt == 0); + assert(md->lba_map == NULL); + + md->lba_map = spdk_mempool_get(dev->lba_pool); + if (!md->lba_map) { + return -1; + } + + ftl_band_acquire_md(band); + return 0; +} + +void +ftl_band_release_md(struct ftl_band *band) +{ + struct ftl_md *md = &band->md; + + assert(band->md.lba_map != NULL); + assert(md->ref_cnt > 0); + md->ref_cnt--; + + if (md->ref_cnt == 0) { + ftl_band_free_md(band); + } +} + +static void +ftl_read_md_cb(void *arg, int status) +{ + struct ftl_md_io *md_io = arg; + + if (!status) { + status = md_io->pack_fn(md_io->io.dev, + md_io->md, + md_io->buf); + } else { + status = FTL_MD_IO_FAILURE; + } + + md_io->cb.fn(md_io->cb.ctx, status); +} + +static struct ftl_md_io * +ftl_io_init_md_read(struct spdk_ftl_dev *dev, struct ftl_md *md, void *data, struct ftl_ppa ppa, + struct ftl_band *band, size_t lbk_cnt, size_t req_size, ftl_md_pack_fn fn, + const struct ftl_cb *cb) +{ + struct ftl_md_io *io; + struct ftl_io_init_opts opts = { + .dev = dev, + .io = NULL, + .rwb_batch = NULL, + .band = band, + .size = sizeof(*io), + .flags = FTL_IO_MD | FTL_IO_PPA_MODE, + .type = FTL_IO_READ, + .iov_cnt = spdk_divide_round_up(lbk_cnt, req_size), + .req_size = req_size, + .fn = ftl_read_md_cb, + .data = data, + }; + + io = (struct ftl_md_io *)ftl_io_init_internal(&opts); + if (!io) { + return NULL; + } + + io->io.ppa = ppa; + io->md = md; + io->buf = data; + io->pack_fn = fn; + io->cb = *cb; + + return io; +} + +static struct ftl_io * +ftl_io_init_md_write(struct spdk_ftl_dev *dev, struct ftl_band *band, + void *data, size_t req_cnt, spdk_ftl_fn cb) +{ + struct ftl_io_init_opts opts = { + .dev = dev, + .io = NULL, + .rwb_batch = NULL, + .band = band, + .size = sizeof(struct ftl_io), + .flags = FTL_IO_MD | FTL_IO_PPA_MODE, + .type = FTL_IO_WRITE, + .iov_cnt = req_cnt, + .req_size = dev->xfer_size, + .fn = cb, + .data = data, + .md = NULL, + }; + + return ftl_io_init_internal(&opts); +} + +static int +ftl_band_write_md(struct ftl_band *band, void *data, size_t lbk_cnt, + ftl_md_pack_fn md_fn, spdk_ftl_fn cb) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_io *io; + + io = ftl_io_init_md_write(dev, band, data, + spdk_divide_round_up(lbk_cnt, dev->xfer_size), cb); + if (!io) { + return -ENOMEM; + } + + md_fn(dev, &band->md, data); + + return ftl_io_write(io); +} + +void +ftl_band_md_clear(struct ftl_md *md) +{ + md->seq = 0; + md->num_vld = 0; + md->wr_cnt = 0; + md->lba_map = NULL; +} + +int +ftl_band_write_head_md(struct ftl_band *band, void *data, spdk_ftl_fn cb) +{ + return ftl_band_write_md(band, data, ftl_head_md_num_lbks(band->dev), + ftl_pack_head_md, cb); +} + +int +ftl_band_write_tail_md(struct ftl_band *band, void *data, spdk_ftl_fn cb) +{ + return ftl_band_write_md(band, data, ftl_tail_md_num_lbks(band->dev), + ftl_pack_tail_md, cb); +} + +static struct ftl_ppa +ftl_band_lba_map_ppa(struct ftl_band *band) +{ + return ftl_band_next_xfer_ppa(band, band->tail_md_ppa, + ftl_tail_md_hdr_num_lbks() + + ftl_vld_map_num_lbks(band->dev)); +} + +static int +ftl_band_read_md(struct ftl_band *band, struct ftl_md *md, void *data, size_t lbk_cnt, + size_t req_size, struct ftl_ppa start_ppa, ftl_md_pack_fn unpack_fn, + const struct ftl_cb *cb) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md_io *io; + + if (spdk_unlikely(!band->num_chunks)) { + return -ENOENT; + } + + io = ftl_io_init_md_read(dev, md, data, start_ppa, band, lbk_cnt, + req_size, unpack_fn, cb); + if (!io) { + return -ENOMEM; + } + + return ftl_io_read((struct ftl_io *)io); +} + +int +ftl_band_read_tail_md(struct ftl_band *band, struct ftl_md *md, + void *data, struct ftl_ppa ppa, const struct ftl_cb *cb) +{ + return ftl_band_read_md(band, md, data, + ftl_tail_md_num_lbks(band->dev), + band->dev->xfer_size, + ppa, + ftl_unpack_tail_md, + cb); +} + +int +ftl_band_read_lba_map(struct ftl_band *band, struct ftl_md *md, + void *data, const struct ftl_cb *cb) +{ + /* TODO: change this interface to allow reading parts of the LBA map instead of */ + /* reading whole metadata */ + return ftl_band_read_md(band, md, data, + ftl_lba_map_num_lbks(band->dev), + band->dev->xfer_size, + ftl_band_lba_map_ppa(band), + ftl_unpack_lba_map, + cb); +} + +int +ftl_band_read_head_md(struct ftl_band *band, struct ftl_md *md, + void *data, const struct ftl_cb *cb) +{ + return ftl_band_read_md(band, md, data, + ftl_head_md_num_lbks(band->dev), + band->dev->xfer_size, + ftl_band_head_md_ppa(band), + ftl_unpack_head_md, + cb); +} + +static void +ftl_band_remove_chunk(struct ftl_band *band, struct ftl_chunk *chunk) +{ + CIRCLEQ_REMOVE(&band->chunks, chunk, circleq); + band->num_chunks--; +} + +static void +ftl_erase_fail(struct ftl_io *io, int status) +{ + struct ftl_chunk *chunk; + char buf[128]; + + SPDK_ERRLOG("Erase failed @ppa: %s, status: %d\n", + ftl_ppa2str(io->ppa, buf, sizeof(buf)), status); + + chunk = ftl_band_chunk_from_ppa(io->band, io->ppa); + chunk->state = FTL_CHUNK_STATE_BAD; + ftl_band_remove_chunk(io->band, chunk); +} + +static void +ftl_band_erase_cb(void *ctx, int status) +{ + struct ftl_io *io = ctx; + struct ftl_chunk *chunk; + + if (spdk_unlikely(status)) { + ftl_erase_fail(io, status); + return; + } + chunk = ftl_band_chunk_from_ppa(io->band, io->ppa); + chunk->state = FTL_CHUNK_STATE_FREE; +} + +int +ftl_band_erase(struct ftl_band *band) +{ + struct ftl_chunk *chunk; + struct ftl_io *io; + int rc = 0; + + assert(band->state == FTL_BAND_STATE_CLOSED || + band->state == FTL_BAND_STATE_FREE); + + ftl_band_set_state(band, FTL_BAND_STATE_PREP); + + CIRCLEQ_FOREACH(chunk, &band->chunks, circleq) { + if (chunk->state == FTL_CHUNK_STATE_FREE) { + continue; + } + + io = ftl_io_erase_init(band, 1, ftl_band_erase_cb); + if (!io) { + rc = -ENOMEM; + break; + } + + io->ppa = chunk->start_ppa; + rc = ftl_io_erase(io); + if (rc) { + assert(0); + /* TODO: change band's state back to close? */ + break; + } + } + + return rc; +} + +int +ftl_band_write_prep(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + + if (ftl_band_alloc_md(band)) { + return -1; + } + + band->md.seq = ++dev->seq; + return 0; +} + +struct ftl_chunk * +ftl_band_next_operational_chunk(struct ftl_band *band, struct ftl_chunk *chunk) +{ + struct ftl_chunk *result = NULL; + struct ftl_chunk *entry; + + if (spdk_unlikely(!band->num_chunks)) { + return NULL; + } + + /* Erasing band may fail after it was assigned to wptr. */ + /* In such a case chunk is no longer in band->chunks queue. */ + if (spdk_likely(chunk->state != FTL_CHUNK_STATE_BAD)) { + result = ftl_band_next_chunk(band, chunk); + } else { + CIRCLEQ_FOREACH_REVERSE(entry, &band->chunks, circleq) { + if (entry->pos > chunk->pos) { + result = entry; + } else { + if (!result) { + result = CIRCLEQ_FIRST(&band->chunks); + } + break; + } + } + } + + return result; +} diff --git a/lib/ftl/ftl_band.h b/lib/ftl/ftl_band.h new file mode 100644 index 000000000..4a9788511 --- /dev/null +++ b/lib/ftl/ftl_band.h @@ -0,0 +1,253 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FTL_BAND_H +#define FTL_BAND_H + +#include "spdk/stdinc.h" +#include "spdk/bit_array.h" +#include "spdk/queue.h" + +#include "ftl_ppa.h" + +struct spdk_ftl_dev; +struct ftl_cb; + +enum ftl_chunk_state { + FTL_CHUNK_STATE_FREE, + FTL_CHUNK_STATE_OPEN, + FTL_CHUNK_STATE_CLOSED, + FTL_CHUNK_STATE_BAD, + FTL_CHUNK_STATE_VACANT, +}; + +struct ftl_chunk { + /* Block state */ + enum ftl_chunk_state state; + + /* First PPA */ + struct ftl_ppa start_ppa; + + /* Pointer to parallel unit */ + struct ftl_punit *punit; + + /* Position in band's chunk_buf */ + uint32_t pos; + + CIRCLEQ_ENTRY(ftl_chunk) circleq; +}; + +enum ftl_md_status { + FTL_MD_SUCCESS, + /* Metadata read failure */ + FTL_MD_IO_FAILURE, + /* Invalid version */ + FTL_MD_INVALID_VER, + /* UUID doesn't match */ + FTL_MD_NO_MD, + /* UUID and version matches but CRC doesn't */ + FTL_MD_INVALID_CRC, + /* Vld or lba map size doesn't match */ + FTL_MD_INVALID_SIZE +}; + +struct ftl_md { + /* Sequence number */ + uint64_t seq; + + /* Number of defrag cycles */ + uint64_t wr_cnt; + + /* LBA/vld map lock */ + pthread_spinlock_t lock; + + /* Number of valid LBAs */ + size_t num_vld; + + /* LBA map's reference count */ + size_t ref_cnt; + + /* Bitmap of valid LBAs */ + struct spdk_bit_array *vld_map; + + /* LBA map (only valid for open bands) */ + uint64_t *lba_map; +}; + +enum ftl_band_state { + FTL_BAND_STATE_FREE, + FTL_BAND_STATE_PREP, + FTL_BAND_STATE_OPENING, + FTL_BAND_STATE_OPEN, + FTL_BAND_STATE_FULL, + FTL_BAND_STATE_CLOSING, + FTL_BAND_STATE_CLOSED, + FTL_BAND_STATE_MAX +}; + +struct ftl_band { + /* Device this band belongs to */ + struct spdk_ftl_dev *dev; + + /* Number of operational chunks */ + size_t num_chunks; + + /* Array of chunks */ + struct ftl_chunk *chunk_buf; + + /* List of operational chunks */ + CIRCLEQ_HEAD(, ftl_chunk) chunks; + + /* Band's metadata */ + struct ftl_md md; + + /* Band's state */ + enum ftl_band_state state; + + /* Band's index */ + unsigned int id; + + /* Latest merit calculation */ + double merit; + + /* High defrag priority - means that the metadata should be copied and */ + /* the band should be defragged immediately */ + int high_prio; + + /* End metadata start ppa */ + struct ftl_ppa tail_md_ppa; + + /* Free/shut bands' lists */ + LIST_ENTRY(ftl_band) list_entry; + + /* High priority queue link */ + STAILQ_ENTRY(ftl_band) prio_stailq; +}; + +uint64_t ftl_band_lbkoff_from_ppa(struct ftl_band *band, struct ftl_ppa ppa); +struct ftl_ppa ftl_band_ppa_from_lbkoff(struct ftl_band *band, uint64_t lbkoff); +void ftl_band_set_state(struct ftl_band *band, enum ftl_band_state state); +size_t ftl_band_age(const struct ftl_band *band); +void ftl_band_acquire_md(struct ftl_band *band); +int ftl_band_alloc_md(struct ftl_band *band); +void ftl_band_release_md(struct ftl_band *band); +struct ftl_ppa ftl_band_next_xfer_ppa(struct ftl_band *band, struct ftl_ppa ppa, + size_t num_lbks); +struct ftl_ppa ftl_band_next_ppa(struct ftl_band *band, struct ftl_ppa ppa, + size_t offset); +size_t ftl_band_num_usable_lbks(const struct ftl_band *band); +size_t ftl_band_user_lbks(const struct ftl_band *band); +void ftl_band_set_addr(struct ftl_band *band, uint64_t lba, + struct ftl_ppa ppa); +struct ftl_band *ftl_band_from_ppa(struct spdk_ftl_dev *dev, struct ftl_ppa ppa); +struct ftl_chunk *ftl_band_chunk_from_ppa(struct ftl_band *band, struct ftl_ppa); +void ftl_band_md_clear(struct ftl_md *md); +int ftl_band_read_tail_md(struct ftl_band *band, struct ftl_md *md, + void *data, struct ftl_ppa, + const struct ftl_cb *cb); +int ftl_band_read_head_md(struct ftl_band *band, struct ftl_md *md, + void *data, const struct ftl_cb *cb); +int ftl_band_read_lba_map(struct ftl_band *band, struct ftl_md *md, + void *data, const struct ftl_cb *cb); +int ftl_band_write_tail_md(struct ftl_band *band, void *data, spdk_ftl_fn cb); +int ftl_band_write_head_md(struct ftl_band *band, void *data, spdk_ftl_fn cb); +struct ftl_ppa ftl_band_tail_md_ppa(struct ftl_band *band); +struct ftl_ppa ftl_band_head_md_ppa(struct ftl_band *band); +void ftl_band_write_failed(struct ftl_band *band); +void ftl_band_clear_md(struct ftl_band *band); +int ftl_band_full(struct ftl_band *band, size_t offset); +int ftl_band_erase(struct ftl_band *band); +int ftl_band_write_prep(struct ftl_band *band); +struct ftl_chunk *ftl_band_next_operational_chunk(struct ftl_band *band, + struct ftl_chunk *chunk); + +static inline int +ftl_band_empty(const struct ftl_band *band) +{ + return band->md.num_vld == 0; +} + +static inline struct ftl_chunk * +ftl_band_next_chunk(struct ftl_band *band, struct ftl_chunk *chunk) +{ + assert(chunk->state != FTL_CHUNK_STATE_BAD); + return CIRCLEQ_LOOP_NEXT(&band->chunks, chunk, circleq); +} + +static inline void +ftl_band_set_next_state(struct ftl_band *band) +{ + ftl_band_set_state(band, (band->state + 1) % FTL_BAND_STATE_MAX); +} + +static inline int +ftl_band_state_changing(struct ftl_band *band) +{ + return band->state == FTL_BAND_STATE_OPENING || + band->state == FTL_BAND_STATE_CLOSING; +} + +static inline int +ftl_band_lbkoff_valid(struct ftl_band *band, size_t lbkoff) +{ + struct ftl_md *md = &band->md; + + pthread_spin_lock(&md->lock); + if (spdk_bit_array_get(md->vld_map, lbkoff)) { + pthread_spin_unlock(&md->lock); + return 1; + } + + pthread_spin_unlock(&md->lock); + return 0; +} + +static inline int +ftl_band_chunk_is_last(struct ftl_band *band, struct ftl_chunk *chunk) +{ + return chunk == CIRCLEQ_LAST(&band->chunks); +} + +static inline int +ftl_band_chunk_is_first(struct ftl_band *band, struct ftl_chunk *chunk) +{ + return chunk == CIRCLEQ_FIRST(&band->chunks); +} + +static inline int +ftl_chunk_is_writable(const struct ftl_chunk *chunk) +{ + return chunk->state == FTL_CHUNK_STATE_OPEN || chunk->state == FTL_CHUNK_STATE_FREE; +} + +#endif /* FTL_BAND_H */ diff --git a/lib/ftl/ftl_core.c b/lib/ftl/ftl_core.c new file mode 100644 index 000000000..73bf1e0ff --- /dev/null +++ b/lib/ftl/ftl_core.c @@ -0,0 +1,1470 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/likely.h" +#include "spdk/stdinc.h" +#include "spdk/nvme.h" +#include "spdk/io_channel.h" +#include "spdk/bdev_module.h" +#include "spdk_internal/log.h" +#include "spdk/ftl.h" + +#include "ftl_core.h" +#include "ftl_band.h" +#include "ftl_io.h" +#include "ftl_rwb.h" +#include "ftl_debug.h" + +/* Max number of iovecs */ +#define FTL_MAX_IOV 1024 + +struct ftl_wptr { + /* Owner device */ + struct spdk_ftl_dev *dev; + + /* Current PPA */ + struct ftl_ppa ppa; + + /* Band currently being written to */ + struct ftl_band *band; + + /* Current logical block's offset */ + uint64_t offset; + + /* Current erase block */ + struct ftl_chunk *chunk; + + /* Metadata DMA buffer */ + void *md_buf; + + /* List link */ + LIST_ENTRY(ftl_wptr) list_entry; +}; + +struct ftl_flush { + /* Owner device */ + struct spdk_ftl_dev *dev; + + /* Number of batches to wait for */ + size_t num_req; + + /* Callback */ + struct ftl_cb cb; + + /* Batch bitmap */ + struct spdk_bit_array *bmap; + + /* List link */ + LIST_ENTRY(ftl_flush) list_entry; +}; + +typedef int (*ftl_next_ppa_fn)(struct ftl_io *, struct ftl_ppa *, size_t, void *); +static void _ftl_read(void *); +static void _ftl_write(void *); + +static int +ftl_rwb_flags_from_io(const struct ftl_io *io) +{ + int valid_flags = FTL_IO_INTERNAL | FTL_IO_WEAK | FTL_IO_PAD; + return io->flags & valid_flags; +} + +static int +ftl_rwb_entry_weak(const struct ftl_rwb_entry *entry) +{ + return entry->flags & FTL_IO_WEAK; +} + +static void +ftl_wptr_free(struct ftl_wptr *wptr) +{ + if (!wptr) { + return; + } + + spdk_dma_free(wptr->md_buf); + free(wptr); +} + +static void +ftl_remove_wptr(struct ftl_wptr *wptr) +{ + LIST_REMOVE(wptr, list_entry); + ftl_wptr_free(wptr); +} + +static void +ftl_io_cmpl_cb(void *arg, const struct spdk_nvme_cpl *status) +{ + struct ftl_io *io = arg; + + if (spdk_nvme_cpl_is_error(status)) { + ftl_io_process_error(io, status); + } + + ftl_trace(completion, ftl_dev_trace(io->dev), io, FTL_TRACE_COMPLETION_DISK); + + if (!ftl_io_dec_req(io)) { + ftl_io_complete(io); + } +} + +static void +ftl_halt_writes(struct spdk_ftl_dev *dev, struct ftl_band *band) +{ + struct ftl_wptr *wptr = NULL; + + LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { + if (wptr->band == band) { + break; + } + } + + /* If the band already has the high_prio flag set, other writes must */ + /* have failed earlier, so it's already taken care of. */ + if (band->high_prio) { + assert(wptr == NULL); + return; + } + + ftl_band_write_failed(band); + ftl_remove_wptr(wptr); +} + +static struct ftl_wptr * +ftl_wptr_from_band(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_wptr *wptr = NULL; + + LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { + if (wptr->band == band) { + return wptr; + } + } + + return NULL; +} + +static void +ftl_md_write_fail(struct ftl_io *io, int status) +{ + struct ftl_band *band = io->band; + struct ftl_wptr *wptr; + char buf[128]; + + wptr = ftl_wptr_from_band(band); + + SPDK_ERRLOG("Metadata write failed @ppa: %s, status: %d\n", + ftl_ppa2str(wptr->ppa, buf, sizeof(buf)), status); + + ftl_halt_writes(io->dev, band); +} + +static void +ftl_md_write_cb(void *arg, int status) +{ + struct ftl_io *io = arg; + struct ftl_wptr *wptr; + + wptr = ftl_wptr_from_band(io->band); + + if (status) { + ftl_md_write_fail(io, status); + return; + } + + ftl_band_set_next_state(io->band); + if (io->band->state == FTL_BAND_STATE_CLOSED) { + ftl_remove_wptr(wptr); + } +} + +static int +ftl_ppa_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa, + size_t lbk, void *ctx) +{ + struct spdk_ftl_dev *dev = io->dev; + size_t lbk_cnt, max_lbks; + + assert(ftl_io_mode_ppa(io)); + assert(io->iov_pos < io->iov_cnt); + + if (lbk == 0) { + *ppa = io->ppa; + } else { + *ppa = ftl_band_next_xfer_ppa(io->band, io->ppa, lbk); + } + + assert(!ftl_ppa_invalid(*ppa)); + + /* Metadata has to be read in the way it's written (jumping across */ + /* the chunks in xfer_size increments) */ + if (io->flags & FTL_IO_MD) { + max_lbks = dev->xfer_size - (ppa->lbk % dev->xfer_size); + lbk_cnt = spdk_min(ftl_io_iovec_len_left(io), max_lbks); + assert(ppa->lbk / dev->xfer_size == (ppa->lbk + lbk_cnt - 1) / dev->xfer_size); + } else { + lbk_cnt = ftl_io_iovec_len_left(io); + } + + return lbk_cnt; +} + +static int +ftl_wptr_close_band(struct ftl_wptr *wptr) +{ + struct ftl_band *band = wptr->band; + + ftl_band_set_state(band, FTL_BAND_STATE_CLOSING); + band->tail_md_ppa = wptr->ppa; + + return ftl_band_write_tail_md(band, wptr->md_buf, ftl_md_write_cb); +} + +static int +ftl_wptr_open_band(struct ftl_wptr *wptr) +{ + struct ftl_band *band = wptr->band; + + assert(ftl_band_chunk_is_first(band, wptr->chunk)); + assert(band->md.num_vld == 0); + + ftl_band_clear_md(band); + + assert(band->state == FTL_BAND_STATE_PREP); + ftl_band_set_state(band, FTL_BAND_STATE_OPENING); + + return ftl_band_write_head_md(band, wptr->md_buf, ftl_md_write_cb); +} + +static int +ftl_submit_erase(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + struct ftl_band *band = io->band; + struct ftl_ppa ppa = io->ppa; + struct ftl_chunk *chunk; + uint64_t ppa_packed; + int rc = 0; + size_t i; + + for (i = 0; i < io->lbk_cnt; ++i) { + if (i != 0) { + chunk = ftl_band_next_chunk(band, ftl_band_chunk_from_ppa(band, ppa)); + assert(chunk->state == FTL_CHUNK_STATE_CLOSED || + chunk->state == FTL_CHUNK_STATE_VACANT); + ppa = chunk->start_ppa; + } + + assert(ppa.lbk == 0); + ppa_packed = ftl_ppa_addr_pack(dev, ppa); + + ftl_io_inc_req(io); + + ftl_trace(submission, ftl_dev_trace(dev), io, ppa, 1); + rc = spdk_nvme_ocssd_ns_cmd_vector_reset(dev->ns, ftl_get_write_qpair(dev), + &ppa_packed, 1, NULL, ftl_io_cmpl_cb, io); + if (rc) { + SPDK_ERRLOG("Vector reset failed with status: %d\n", rc); + ftl_io_dec_req(io); + break; + } + + } + + if (ftl_io_done(io)) { + ftl_io_complete(io); + } + + return rc; +} + +static void +_ftl_io_erase(void *ctx) +{ + ftl_io_erase((struct ftl_io *)ctx); +} + +static bool +ftl_check_core_thread(const struct spdk_ftl_dev *dev) +{ + return dev->core_thread.tid == pthread_self(); +} + +static bool +ftl_check_read_thread(const struct spdk_ftl_dev *dev) +{ + return dev->read_thread.tid == pthread_self(); +} + +int +ftl_io_erase(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + + if (ftl_check_core_thread(dev)) { + return ftl_submit_erase(io); + } + + spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_erase, io); + return 0; +} + +static struct ftl_band * +ftl_next_write_band(struct spdk_ftl_dev *dev) +{ + struct ftl_band *band; + + band = LIST_FIRST(&dev->free_bands); + if (!band) { + return NULL; + } + assert(band->state == FTL_BAND_STATE_FREE); + + if (ftl_band_erase(band)) { + /* TODO: handle erase failure */ + return NULL; + } + + return band; +} + +static struct ftl_band * +ftl_next_wptr_band(struct spdk_ftl_dev *dev) +{ + struct ftl_band *band; + + if (!dev->next_band) { + band = ftl_next_write_band(dev); + } else { + assert(dev->next_band->state == FTL_BAND_STATE_PREP); + band = dev->next_band; + dev->next_band = NULL; + } + + return band; +} + +static struct ftl_wptr * +ftl_wptr_init(struct ftl_band *band) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_wptr *wptr; + + wptr = calloc(1, sizeof(*wptr)); + if (!wptr) { + return NULL; + } + + wptr->md_buf = spdk_dma_zmalloc(ftl_tail_md_num_lbks(dev) * FTL_BLOCK_SIZE, + FTL_BLOCK_SIZE, NULL); + if (!wptr->md_buf) { + ftl_wptr_free(wptr); + return NULL; + } + + wptr->dev = dev; + wptr->band = band; + wptr->chunk = CIRCLEQ_FIRST(&band->chunks); + wptr->ppa = wptr->chunk->start_ppa; + + return wptr; +} + +static int +ftl_add_wptr(struct spdk_ftl_dev *dev) +{ + struct ftl_band *band; + struct ftl_wptr *wptr; + + band = ftl_next_wptr_band(dev); + if (!band) { + return -1; + } + + wptr = ftl_wptr_init(band); + if (!wptr) { + return -1; + } + + if (ftl_band_write_prep(band)) { + ftl_wptr_free(wptr); + return -1; + } + + LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: band %u\n", band->id); + ftl_trace(write_band, ftl_dev_trace(dev), band); + return 0; +} + +static void +ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size) +{ + struct ftl_band *band = wptr->band; + struct spdk_ftl_dev *dev = wptr->dev; + struct spdk_ftl_conf *conf = &dev->conf; + size_t next_thld; + + wptr->offset += xfer_size; + next_thld = (ftl_band_num_usable_lbks(band) * conf->band_thld) / 100; + + if (ftl_band_full(band, wptr->offset)) { + ftl_band_set_state(band, FTL_BAND_STATE_FULL); + } + + wptr->ppa = ftl_band_next_xfer_ppa(band, wptr->ppa, xfer_size); + wptr->chunk = ftl_band_next_operational_chunk(band, wptr->chunk); + + assert(!ftl_ppa_invalid(wptr->ppa)); + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: grp:%d, pu:%d chunk:%d, lbk:%u\n", + wptr->ppa.grp, wptr->ppa.pu, wptr->ppa.chk, wptr->ppa.lbk); + + if (wptr->offset >= next_thld && !dev->next_band) { + dev->next_band = ftl_next_write_band(dev); + } +} + +static int +ftl_wptr_ready(struct ftl_wptr *wptr) +{ + struct ftl_band *band = wptr->band; + + /* TODO: add handling of empty bands */ + + if (spdk_unlikely(!ftl_chunk_is_writable(wptr->chunk))) { + /* Erasing band may fail after it was assigned to wptr. */ + if (spdk_unlikely(wptr->chunk->state == FTL_CHUNK_STATE_BAD)) { + ftl_wptr_advance(wptr, wptr->dev->xfer_size); + } + return 0; + } + + /* If we're in the process of writing metadata, wait till it is */ + /* completed. */ + /* TODO: we should probably change bands once we're writing tail md */ + if (ftl_band_state_changing(band)) { + return 0; + } + + if (band->state == FTL_BAND_STATE_FULL) { + if (ftl_wptr_close_band(wptr)) { + /* TODO: need recovery here */ + assert(false); + } + return 0; + } + + if (band->state != FTL_BAND_STATE_OPEN) { + if (ftl_wptr_open_band(wptr)) { + /* TODO: need recovery here */ + assert(false); + } + return 0; + } + + return 1; +} + +static const struct spdk_ftl_limit * +ftl_get_limit(const struct spdk_ftl_dev *dev, int type) +{ + assert(type < SPDK_FTL_LIMIT_MAX); + return &dev->conf.defrag.limits[type]; +} + +static int +ftl_update_md_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) +{ + struct ftl_ppa ppa; + + /* If the LBA is invalid don't bother checking the md and l2p */ + if (spdk_unlikely(entry->lba == FTL_LBA_INVALID)) { + return 1; + } + + ppa = ftl_l2p_get(dev, entry->lba); + if (!(ftl_ppa_cached(ppa) && ppa.offset == entry->pos)) { + ftl_invalidate_addr(dev, entry->ppa); + return 1; + } + + return 0; +} + +static void +ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) +{ + pthread_spin_lock(&entry->lock); + + if (!ftl_rwb_entry_valid(entry)) { + goto unlock; + } + + /* Make sure the metadata is in sync with l2p. If the l2p still contains */ + /* the entry, fill it with the on-disk PPA and clear the cache status */ + /* bit. Otherwise, skip the l2p update and just clear the cache status. */ + /* This can happen, when a write comes during the time that l2p contains */ + /* the entry, but the entry doesn't have a PPA assigned (and therefore */ + /* does not have the cache bit set). */ + if (ftl_update_md_entry(dev, entry)) { + goto clear; + } + + ftl_l2p_set(dev, entry->lba, entry->ppa); +clear: + ftl_rwb_entry_invalidate(entry); +unlock: + pthread_spin_unlock(&entry->lock); +} + +static struct ftl_rwb_entry * +ftl_acquire_entry(struct spdk_ftl_dev *dev, int flags) +{ + struct ftl_rwb_entry *entry; + + entry = ftl_rwb_acquire(dev->rwb, ftl_rwb_type_from_flags(flags)); + if (!entry) { + return NULL; + } + + ftl_evict_cache_entry(dev, entry); + + entry->flags = flags; + return entry; +} + +static void +ftl_rwb_pad(struct spdk_ftl_dev *dev, size_t size) +{ + struct ftl_rwb_entry *entry; + int flags = FTL_IO_PAD | FTL_IO_INTERNAL; + + for (size_t i = 0; i < size; ++i) { + entry = ftl_acquire_entry(dev, flags); + if (!entry) { + break; + } + + entry->lba = FTL_LBA_INVALID; + entry->ppa = ftl_to_ppa(FTL_PPA_INVALID); + memset(entry->data, 0, FTL_BLOCK_SIZE); + ftl_rwb_push(entry); + } +} + +static void +ftl_remove_free_bands(struct spdk_ftl_dev *dev) +{ + while (!LIST_EMPTY(&dev->free_bands)) { + LIST_REMOVE(LIST_FIRST(&dev->free_bands), list_entry); + } + + dev->next_band = NULL; +} + +static void +ftl_process_shutdown(struct spdk_ftl_dev *dev) +{ + size_t size = ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) + + ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER); + + if (size >= dev->xfer_size) { + return; + } + + /* If we reach this point we need to remove free bands */ + /* and pad current wptr band to the end */ + ftl_remove_free_bands(dev); + + /* Pad write buffer until band is full */ + ftl_rwb_pad(dev, dev->xfer_size - size); +} + +static int +ftl_shutdown_complete(struct spdk_ftl_dev *dev) +{ + return !__atomic_load_n(&dev->num_inflight, __ATOMIC_SEQ_CST) && + LIST_EMPTY(&dev->wptr_list); +} + +void +ftl_apply_limits(struct spdk_ftl_dev *dev) +{ + const struct spdk_ftl_limit *limit; + struct ftl_stats *stats = &dev->stats; + size_t rwb_limit[FTL_RWB_TYPE_MAX]; + int i; + + ftl_rwb_get_limits(dev->rwb, rwb_limit); + + /* Clear existing limit */ + dev->limit = SPDK_FTL_LIMIT_MAX; + + for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) { + limit = ftl_get_limit(dev, i); + + if (dev->num_free <= limit->thld) { + rwb_limit[FTL_RWB_TYPE_USER] = + (limit->limit * ftl_rwb_entry_cnt(dev->rwb)) / 100; + stats->limits[i]++; + dev->limit = i; + goto apply; + } + } + + /* Clear the limits, since we don't need to apply them anymore */ + rwb_limit[FTL_RWB_TYPE_USER] = ftl_rwb_entry_cnt(dev->rwb); +apply: + ftl_trace(limits, ftl_dev_trace(dev), rwb_limit, dev->num_free); + ftl_rwb_set_limits(dev->rwb, rwb_limit); +} + +static int +ftl_invalidate_addr_unlocked(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) +{ + struct ftl_band *band = ftl_band_from_ppa(dev, ppa); + struct ftl_md *md = &band->md; + uint64_t offset; + + offset = ftl_band_lbkoff_from_ppa(band, ppa); + + /* The bit might be already cleared if two writes are scheduled to the */ + /* same LBA at the same time */ + if (spdk_bit_array_get(md->vld_map, offset)) { + assert(md->num_vld > 0); + spdk_bit_array_clear(md->vld_map, offset); + md->num_vld--; + return 1; + } + + return 0; +} + +int +ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) +{ + struct ftl_band *band; + int rc; + + assert(!ftl_ppa_cached(ppa)); + band = ftl_band_from_ppa(dev, ppa); + + pthread_spin_lock(&band->md.lock); + rc = ftl_invalidate_addr_unlocked(dev, ppa); + pthread_spin_unlock(&band->md.lock); + + return rc; +} + +static int +ftl_read_retry(int rc) +{ + return rc == -EAGAIN; +} + +static int +ftl_read_canceled(int rc) +{ + return rc == 0; +} + +static int +ftl_submit_read(struct ftl_io *io, ftl_next_ppa_fn next_ppa, + void *ctx) +{ + struct spdk_ftl_dev *dev = io->dev; + struct ftl_ppa ppa; + size_t lbk = 0; + int rc = 0, lbk_cnt; + + while (lbk < io->lbk_cnt) { + /* We might hit the cache here, if so, skip the read */ + lbk_cnt = rc = next_ppa(io, &ppa, lbk, ctx); + + /* We might need to retry the read from scratch (e.g. */ + /* because write was under way and completed before */ + /* we could read it from rwb */ + if (ftl_read_retry(rc)) { + continue; + } + + /* We don't have to schedule the read, as it was read from cache */ + if (ftl_read_canceled(rc)) { + ftl_io_update_iovec(io, 1); + lbk++; + continue; + } + + assert(lbk_cnt > 0); + + ftl_trace(submission, ftl_dev_trace(dev), io, ppa, lbk_cnt); + rc = spdk_nvme_ns_cmd_read(dev->ns, ftl_get_read_qpair(dev), + ftl_io_iovec_addr(io), + ftl_ppa_addr_pack(io->dev, ppa), lbk_cnt, + ftl_io_cmpl_cb, io, 0); + if (rc) { + SPDK_ERRLOG("spdk_nvme_ns_cmd_read failed with status: %d\n", rc); + io->status = -EIO; + break; + } + + ftl_io_update_iovec(io, lbk_cnt); + ftl_io_inc_req(io); + lbk += lbk_cnt; + } + + /* If we didn't have to read anything from the device, */ + /* complete the request right away */ + if (ftl_io_done(io)) { + ftl_io_complete(io); + } + + return rc; +} + +static int +ftl_ppa_cache_read(struct ftl_io *io, uint64_t lba, + struct ftl_ppa ppa, void *buf) +{ + struct ftl_rwb *rwb = io->dev->rwb; + struct ftl_rwb_entry *entry; + struct ftl_ppa nppa; + int rc = 0; + + entry = ftl_rwb_entry_from_offset(rwb, ppa.offset); + pthread_spin_lock(&entry->lock); + + nppa = ftl_l2p_get(io->dev, lba); + if (ppa.ppa != nppa.ppa) { + rc = -1; + goto out; + } + + memcpy(buf, entry->data, FTL_BLOCK_SIZE); +out: + pthread_spin_unlock(&entry->lock); + return rc; +} + +static int +ftl_lba_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa, + size_t lbk, void *ctx) +{ + struct spdk_ftl_dev *dev = io->dev; + *ppa = ftl_l2p_get(dev, io->lba + lbk); + + (void) ctx; + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Read ppa:%lx, lba:%lu\n", ppa->ppa, io->lba); + + /* If the PPA is invalid, skip it (the buffer should already be zero'ed) */ + if (ftl_ppa_invalid(*ppa)) { + ftl_trace(completion, ftl_dev_trace(io->dev), io, + FTL_TRACE_COMPLETION_INVALID); + return 0; + } + + if (ftl_ppa_cached(*ppa)) { + if (!ftl_ppa_cache_read(io, io->lba + lbk, *ppa, ftl_io_iovec_addr(io))) { + ftl_trace(completion, ftl_dev_trace(io->dev), io, + FTL_TRACE_COMPLETION_CACHE); + return 0; + } + + /* If the state changed, we have to re-read the l2p */ + return -EAGAIN; + } + + /* We want to read one lbk at a time */ + return 1; +} + +static void +ftl_complete_flush(struct ftl_flush *flush) +{ + assert(flush->num_req == 0); + LIST_REMOVE(flush, list_entry); + + flush->cb.fn(flush->cb.ctx, 0); + + spdk_bit_array_free(&flush->bmap); + free(flush); +} + +static void +ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_rwb_batch *batch) +{ + struct ftl_flush *flush, *tflush; + size_t offset; + + LIST_FOREACH_SAFE(flush, &dev->flush_list, list_entry, tflush) { + offset = ftl_rwb_batch_get_offset(batch); + + if (spdk_bit_array_get(flush->bmap, offset)) { + spdk_bit_array_set(flush->bmap, offset); + if (!(--flush->num_req)) { + ftl_complete_flush(flush); + } + } + } +} + +static void +ftl_write_fail(struct ftl_io *io, int status) +{ + struct ftl_rwb_batch *batch = io->rwb_batch; + struct spdk_ftl_dev *dev = io->dev; + struct ftl_rwb_entry *entry; + struct ftl_band *band; + char buf[128]; + + entry = ftl_rwb_batch_first_entry(batch); + + band = ftl_band_from_ppa(io->dev, entry->ppa); + SPDK_ERRLOG("Write failed @ppa: %s, status: %d\n", + ftl_ppa2str(entry->ppa, buf, sizeof(buf)), status); + + /* Close the band and, halt wptr and defrag */ + ftl_halt_writes(dev, band); + + ftl_rwb_foreach(entry, batch) { + /* Invalidate meta set by process_writes() */ + ftl_invalidate_addr(dev, entry->ppa); + } + + /* Reset the batch back to the the RWB to resend it later */ + ftl_rwb_batch_revert(batch); +} + +static void +ftl_write_cb(void *arg, int status) +{ + struct ftl_io *io = arg; + struct spdk_ftl_dev *dev = io->dev; + struct ftl_rwb_batch *batch = io->rwb_batch; + struct ftl_rwb_entry *entry; + + if (status) { + ftl_write_fail(io, status); + return; + } + + assert(io->lbk_cnt == dev->xfer_size); + ftl_rwb_foreach(entry, batch) { + if (!(io->flags & FTL_IO_MD) && !(entry->flags & FTL_IO_PAD)) { + /* Verify that the LBA is set for user lbks */ + assert(entry->lba != FTL_LBA_INVALID); + } + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lu, lba:%lu\n", + entry->ppa.ppa, entry->lba); + + if (ftl_update_md_entry(dev, entry)) { + ftl_rwb_entry_invalidate(entry); + } + } + + ftl_process_flush(dev, batch); + ftl_rwb_batch_release(batch); +} + +static void +ftl_update_rwb_stats(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry) +{ + if (!ftl_rwb_entry_internal(entry)) { + dev->stats.write_user++; + } + dev->stats.write_total++; +} + +static void +ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry, + struct ftl_ppa ppa) +{ + struct ftl_ppa prev_ppa; + struct ftl_rwb_entry *prev; + struct ftl_band *band; + int valid; + + prev_ppa = ftl_l2p_get(dev, entry->lba); + if (ftl_ppa_invalid(prev_ppa)) { + ftl_l2p_set(dev, entry->lba, ppa); + return; + } + + /* If the L2P's PPA is different than what we expected we don't need to */ + /* do anything (someone's already overwritten our data). */ + if (ftl_rwb_entry_weak(entry) && !ftl_ppa_cmp(prev_ppa, entry->ppa)) { + return; + } + + if (ftl_ppa_cached(prev_ppa)) { + assert(!ftl_rwb_entry_weak(entry)); + prev = ftl_rwb_entry_from_offset(dev->rwb, prev_ppa.offset); + pthread_spin_lock(&prev->lock); + + /* Re-read the L2P under the lock to protect against updates */ + /* to this LBA from other threads */ + prev_ppa = ftl_l2p_get(dev, entry->lba); + + /* If the entry is no longer in cache, another write has been */ + /* scheduled in the meantime, so we have to invalidate its LBA */ + if (!ftl_ppa_cached(prev_ppa)) { + ftl_invalidate_addr(dev, prev_ppa); + } + + /* If previous entry is part of cache, remove and invalidate it */ + if (ftl_rwb_entry_valid(prev)) { + ftl_invalidate_addr(dev, prev->ppa); + ftl_rwb_entry_invalidate(prev); + } + + ftl_l2p_set(dev, entry->lba, ppa); + pthread_spin_unlock(&prev->lock); + return; + } + + /* Lock the band containing previous PPA. This assures atomic changes to */ + /* the L2P as wall as metadata. The valid bits in metadata are used to */ + /* check weak writes validity. */ + band = ftl_band_from_ppa(dev, prev_ppa); + pthread_spin_lock(&band->md.lock); + + valid = ftl_invalidate_addr_unlocked(dev, prev_ppa); + + /* If the address has been invalidated already, we don't want to update */ + /* the L2P for weak writes, as it means the write is no longer valid. */ + if (!ftl_rwb_entry_weak(entry) || valid) { + ftl_l2p_set(dev, entry->lba, ppa); + } + + pthread_spin_unlock(&band->md.lock); +} + +static int +ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + struct iovec *iov = ftl_io_iovec(io); + int rc = 0; + size_t i; + + for (i = 0; i < io->iov_cnt; ++i) { + assert(iov[i].iov_len > 0); + assert(iov[i].iov_len / PAGE_SIZE == dev->xfer_size); + + ftl_trace(submission, ftl_dev_trace(dev), io, wptr->ppa, + iov[i].iov_len / PAGE_SIZE); + rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev), + iov[i].iov_base, ftl_io_get_md(io), + ftl_ppa_addr_pack(dev, wptr->ppa), + iov[i].iov_len / PAGE_SIZE, + ftl_io_cmpl_cb, io, 0, 0, 0); + if (rc) { + SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n", + rc, wptr->ppa.ppa); + io->status = -EIO; + break; + } + + io->pos = iov[i].iov_len / PAGE_SIZE; + ftl_io_inc_req(io); + ftl_wptr_advance(wptr, iov[i].iov_len / PAGE_SIZE); + } + + if (ftl_io_done(io)) { + ftl_io_complete(io); + } + + return rc; +} + +static void +ftl_flush_pad_batch(struct spdk_ftl_dev *dev) +{ + struct ftl_rwb *rwb = dev->rwb; + size_t size; + + size = ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_INTERNAL) + + ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_USER); + + /* There must be something in the RWB, otherwise the flush */ + /* wouldn't be waiting for anything */ + assert(size > 0); + + /* Only add padding when there's less than xfer size */ + /* entries in the buffer. Otherwise we just have to wait */ + /* for the entries to become ready. */ + if (size < dev->xfer_size) { + ftl_rwb_pad(dev, dev->xfer_size - (size % dev->xfer_size)); + } +} + +static int +ftl_wptr_process_writes(struct ftl_wptr *wptr) +{ + struct spdk_ftl_dev *dev = wptr->dev; + struct ftl_rwb_batch *batch; + struct ftl_rwb_entry *entry; + struct ftl_io *io; + struct ftl_ppa ppa; + + /* Make sure the band is prepared for writing */ + if (!ftl_wptr_ready(wptr)) { + return 0; + } + + if (dev->halt) { + ftl_process_shutdown(dev); + } + + batch = ftl_rwb_pop(dev->rwb); + if (!batch) { + /* If there are queued flush requests we need to pad the RWB to */ + /* force out remaining entries */ + if (!LIST_EMPTY(&dev->flush_list)) { + ftl_flush_pad_batch(dev); + } + + return 0; + } + + io = ftl_io_rwb_init(dev, wptr->band, batch, ftl_write_cb); + if (!io) { + goto error; + } + + ppa = wptr->ppa; + ftl_rwb_foreach(entry, batch) { + entry->ppa = ppa; + /* Setting entry's cache bit needs to be done after metadata */ + /* within the band is updated to make sure that writes */ + /* invalidating the entry clear the metadata as well */ + if (entry->lba != FTL_LBA_INVALID) { + ftl_band_set_addr(wptr->band, entry->lba, entry->ppa); + } + + ftl_rwb_entry_set_valid(entry); + + ftl_trace(rwb_pop, ftl_dev_trace(dev), entry); + ftl_update_rwb_stats(dev, entry); + + ppa = ftl_band_next_ppa(wptr->band, ppa, 1); + } + + SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lx, %lx\n", wptr->ppa.ppa, + ftl_ppa_addr_pack(dev, wptr->ppa)); + + if (ftl_submit_write(wptr, io)) { + /* TODO: we need some recovery here */ + assert(0 && "Write submit failed"); + if (ftl_io_done(io)) { + ftl_io_free(io); + } + } + + return dev->xfer_size; +error: + ftl_rwb_batch_revert(batch); + return 0; +} + +static int +ftl_process_writes(struct spdk_ftl_dev *dev) +{ + struct ftl_wptr *wptr, *twptr; + size_t num_active = 0; + enum ftl_band_state state; + + LIST_FOREACH_SAFE(wptr, &dev->wptr_list, list_entry, twptr) { + ftl_wptr_process_writes(wptr); + state = wptr->band->state; + + if (state != FTL_BAND_STATE_FULL && + state != FTL_BAND_STATE_CLOSING && + state != FTL_BAND_STATE_CLOSED) { + num_active++; + } + } + + if (num_active < 1) { + ftl_add_wptr(dev); + } + + return 0; +} + +static void +ftl_rwb_entry_fill(struct ftl_rwb_entry *entry, struct ftl_io *io) +{ + struct ftl_band *band; + + memcpy(entry->data, ftl_io_iovec_addr(io), FTL_BLOCK_SIZE); + + if (ftl_rwb_entry_weak(entry)) { + band = ftl_band_from_ppa(io->dev, io->ppa); + entry->ppa = ftl_band_next_ppa(band, io->ppa, io->pos); + } + + entry->trace = io->trace; + + if (entry->md) { + memcpy(entry->md, &entry->lba, sizeof(io->lba)); + } +} + +static int +ftl_rwb_fill(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + struct ftl_rwb_entry *entry; + struct ftl_ppa ppa = { .cached = 1 }; + int flags = ftl_rwb_flags_from_io(io); + uint64_t lba; + + for (; io->pos < io->lbk_cnt; ++io->pos) { + lba = ftl_io_current_lba(io); + if (lba == FTL_LBA_INVALID) { + ftl_io_update_iovec(io, 1); + continue; + } + + entry = ftl_acquire_entry(dev, flags); + if (!entry) { + return -EAGAIN; + } + + entry->lba = lba; + ftl_rwb_entry_fill(entry, io); + + ppa.offset = entry->pos; + + ftl_io_update_iovec(io, 1); + ftl_update_l2p(dev, entry, ppa); + + /* Needs to be done after L2P is updated to avoid race with */ + /* write completion callback when it's processed faster than */ + /* L2P is set in update_l2p(). */ + ftl_rwb_push(entry); + ftl_trace(rwb_fill, ftl_dev_trace(dev), io); + } + + ftl_io_complete(io); + return 0; +} + +int +ftl_current_limit(const struct spdk_ftl_dev *dev) +{ + return dev->limit; +} + +int +spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs) +{ + if (!dev || !attrs) { + return -EINVAL; + } + + attrs->uuid = dev->uuid; + attrs->lbk_cnt = dev->num_lbas; + attrs->lbk_size = FTL_BLOCK_SIZE; + attrs->range = dev->range; + + return 0; +} + +static void +_ftl_io_write(void *ctx) +{ + ftl_io_write((struct ftl_io *)ctx); +} + +int +ftl_io_write(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + + /* For normal IOs we just need to copy the data onto the rwb */ + if (!(io->flags & FTL_IO_MD)) { + return ftl_rwb_fill(io); + } + + /* Metadata has its own buffer, so it doesn't have to be copied, so just */ + /* send it the the core thread and schedule the write immediately */ + if (ftl_check_core_thread(dev)) { + return ftl_submit_write(ftl_wptr_from_band(io->band), io); + } + + spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_write, io); + + return 0; +} + + + +static int +_spdk_ftl_write(struct ftl_io *io) +{ + int rc; + + rc = ftl_io_write(io); + if (rc == -EAGAIN) { + spdk_thread_send_msg(spdk_io_channel_get_thread(io->ch), + _ftl_write, io); + return 0; + } + + if (rc) { + ftl_io_free(io); + } + + return rc; +} + +static void +_ftl_write(void *ctx) +{ + _spdk_ftl_write(ctx); +} + +int +spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, + struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) +{ + struct ftl_io *io; + + if (!iov || !cb_fn || !dev) { + return -EINVAL; + } + + if (iov_cnt == 0 || iov_cnt > FTL_MAX_IOV) { + return -EINVAL; + } + + if (lba_cnt == 0) { + return -EINVAL; + } + + if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { + return -EINVAL; + } + + if (!dev->initialized) { + return -EBUSY; + } + + io = ftl_io_alloc(ch); + if (!io) { + return -ENOMEM; + } + + ftl_io_user_init(dev, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE); + return _spdk_ftl_write(io); +} + +int +ftl_io_read(struct ftl_io *io) +{ + struct spdk_ftl_dev *dev = io->dev; + ftl_next_ppa_fn next_ppa; + + if (ftl_check_read_thread(dev)) { + if (ftl_io_mode_ppa(io)) { + next_ppa = ftl_ppa_read_next_ppa; + } else { + next_ppa = ftl_lba_read_next_ppa; + } + + return ftl_submit_read(io, next_ppa, NULL); + } + + spdk_thread_send_msg(ftl_get_read_thread(dev), _ftl_read, io); + return 0; +} + +static void +_ftl_read(void *arg) +{ + ftl_io_read((struct ftl_io *)arg); +} + +int +spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, + struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) +{ + struct ftl_io *io; + + if (!iov || !cb_fn || !dev) { + return -EINVAL; + } + + if (iov_cnt == 0 || iov_cnt > FTL_MAX_IOV) { + return -EINVAL; + } + + if (lba_cnt == 0) { + return -EINVAL; + } + + if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { + return -EINVAL; + } + + if (!dev->initialized) { + return -EBUSY; + } + + io = ftl_io_alloc(ch); + if (!io) { + return -ENOMEM; + } + + ftl_io_user_init(dev, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ); + return ftl_io_read(io); +} + +static struct ftl_flush * +ftl_flush_init(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) +{ + struct ftl_flush *flush; + struct ftl_rwb *rwb = dev->rwb; + + flush = calloc(1, sizeof(*flush)); + if (!flush) { + return NULL; + } + + flush->bmap = spdk_bit_array_create(ftl_rwb_num_batches(rwb)); + if (!flush->bmap) { + goto error; + } + + flush->dev = dev; + flush->cb.fn = cb_fn; + flush->cb.ctx = cb_arg; + + return flush; +error: + free(flush); + return NULL; +} + +static void +_ftl_flush(void *ctx) +{ + struct ftl_flush *flush = ctx; + struct spdk_ftl_dev *dev = flush->dev; + struct ftl_rwb *rwb = dev->rwb; + struct ftl_rwb_batch *batch; + + /* Attach flush object to all non-empty batches */ + ftl_rwb_foreach_batch(batch, rwb) { + if (!ftl_rwb_batch_empty(batch)) { + spdk_bit_array_set(flush->bmap, ftl_rwb_batch_get_offset(batch)); + flush->num_req++; + } + } + + LIST_INSERT_HEAD(&dev->flush_list, flush, list_entry); + + /* If the RWB was already empty, the flush can be completed right away */ + if (!flush->num_req) { + ftl_complete_flush(flush); + } +} + +int +spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) +{ + struct ftl_flush *flush; + + if (!dev || !cb_fn) { + return -EINVAL; + } + + if (!dev->initialized) { + return -EBUSY; + } + + flush = ftl_flush_init(dev, cb_fn, cb_arg); + if (!flush) { + return -ENOMEM; + } + + spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_flush, flush); + return 0; +} + +int +ftl_task_read(void *ctx) +{ + struct ftl_thread *thread = ctx; + struct spdk_ftl_dev *dev = thread->dev; + struct spdk_nvme_qpair *qpair = ftl_get_read_qpair(dev); + + if (dev->halt) { + if (ftl_shutdown_complete(dev)) { + spdk_poller_unregister(&thread->poller); + return 0; + } + } + + return spdk_nvme_qpair_process_completions(qpair, 1); +} + +int +ftl_task_core(void *ctx) +{ + struct ftl_thread *thread = ctx; + struct spdk_ftl_dev *dev = thread->dev; + struct spdk_nvme_qpair *qpair = ftl_get_write_qpair(dev); + + if (dev->halt) { + if (ftl_shutdown_complete(dev)) { + spdk_poller_unregister(&thread->poller); + return 0; + } + } + + ftl_process_writes(dev); + spdk_nvme_qpair_process_completions(qpair, 1); + + return 0; +} + +SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE) diff --git a/lib/ftl/ftl_core.h b/lib/ftl/ftl_core.h index 26d8d0aff..24fa46a5b 100644 --- a/lib/ftl/ftl_core.h +++ b/lib/ftl/ftl_core.h @@ -221,7 +221,7 @@ int ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_ppa ppa); int ftl_task_core(void *ctx); int ftl_task_read(void *ctx); size_t ftl_tail_md_num_lbks(const struct spdk_ftl_dev *dev); -size_t ftl_tail_md_hdr_num_lbks(const struct spdk_ftl_dev *dev); +size_t ftl_tail_md_hdr_num_lbks(void); size_t ftl_vld_map_num_lbks(const struct spdk_ftl_dev *dev); size_t ftl_lba_map_num_lbks(const struct spdk_ftl_dev *dev); size_t ftl_head_md_num_lbks(const struct spdk_ftl_dev *dev); diff --git a/lib/ftl/ftl_debug.c b/lib/ftl/ftl_debug.c new file mode 100644 index 000000000..ca64de39f --- /dev/null +++ b/lib/ftl/ftl_debug.c @@ -0,0 +1,163 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk_internal/log.h" +#include "spdk/ftl.h" +#include "ftl_debug.h" + +#if defined(DEBUG) +#if defined(FTL_META_DEBUG) + +static const char *ftl_band_state_str[] = { + "free", + "prep", + "opening", + "open", + "full", + "closing", + "closed", + "max" +}; + +bool +ftl_band_validate_md(struct ftl_band *band, const uint64_t *lba_map) +{ + struct spdk_ftl_dev *dev = band->dev; + struct ftl_md *md = &band->md; + struct ftl_ppa ppa_md, ppa_l2p; + size_t i, size; + bool valid = true; + + size = ftl_num_band_lbks(dev); + + pthread_spin_lock(&md->lock); + for (i = 0; i < size; ++i) { + if (!spdk_bit_array_get(md->vld_map, i)) { + continue; + } + + ppa_md = ftl_band_ppa_from_lbkoff(band, i); + ppa_l2p = ftl_l2p_get(dev, lba_map[i]); + + if (ppa_l2p.cached) { + continue; + } + + if (ppa_l2p.ppa != ppa_md.ppa) { + valid = false; + break; + } + + } + + pthread_spin_unlock(&md->lock); + + return valid; +} + +void +ftl_dev_dump_bands(struct spdk_ftl_dev *dev) +{ + size_t i, total = 0; + + if (!dev->bands) { + return; + } + + ftl_debug("Bands validity:\n"); + for (i = 0; i < ftl_dev_num_bands(dev); ++i) { + if (dev->bands[i].state == FTL_BAND_STATE_FREE && + dev->bands[i].md.wr_cnt == 0) { + continue; + } + + if (!dev->bands[i].num_chunks) { + ftl_debug(" Band %3zu: all chunks are offline\n", i + 1); + continue; + } + + total += dev->bands[i].md.num_vld; + ftl_debug(" Band %3zu: %8zu / %zu \tnum_chunks: %zu \twr_cnt: %"PRIu64"\tmerit:" + "%10.3f\tstate: %s\n", + i + 1, dev->bands[i].md.num_vld, + ftl_band_user_lbks(&dev->bands[i]), + dev->bands[i].num_chunks, + dev->bands[i].md.wr_cnt, + dev->bands[i].merit, + ftl_band_state_str[dev->bands[i].state]); + } +} + +#endif /* defined(FTL_META_DEBUG) */ + +#if defined(FTL_DUMP_STATS) + +void +ftl_dev_dump_stats(const struct spdk_ftl_dev *dev) +{ + size_t i, total = 0; + char uuid[SPDK_UUID_STRING_LEN]; + double waf; + const char *limits[] = { + [SPDK_FTL_LIMIT_CRIT] = "crit", + [SPDK_FTL_LIMIT_HIGH] = "high", + [SPDK_FTL_LIMIT_LOW] = "low", + [SPDK_FTL_LIMIT_START] = "start" + }; + + if (!dev->bands) { + return; + } + + /* Count the number of valid LBAs */ + for (i = 0; i < ftl_dev_num_bands(dev); ++i) { + total += dev->bands[i].md.num_vld; + } + + waf = (double)dev->stats.write_total / (double)dev->stats.write_user; + + spdk_uuid_fmt_lower(uuid, sizeof(uuid), &dev->uuid); + ftl_debug("\n"); + ftl_debug("device UUID: %s\n", uuid); + ftl_debug("total valid LBAs: %zu\n", total); + ftl_debug("total writes: %"PRIu64"\n", dev->stats.write_total); + ftl_debug("user writes: %"PRIu64"\n", dev->stats.write_user); + ftl_debug("WAF: %.4lf\n", waf); + ftl_debug("limits:\n"); + for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) { + ftl_debug(" %5s: %"PRIu64"\n", limits[i], dev->stats.limits[i]); + } +} + +#endif /* defined(FTL_DUMP_STATS) */ +#endif /* defined(DEBUG) */ diff --git a/lib/ftl/ftl_debug.h b/lib/ftl/ftl_debug.h new file mode 100644 index 000000000..c46b3f842 --- /dev/null +++ b/lib/ftl/ftl_debug.h @@ -0,0 +1,75 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FTL_DEBUG_H +#define FTL_DEBUG_H + +#include "ftl_ppa.h" +#include "ftl_band.h" +#include "ftl_core.h" +#include "ftl_rwb.h" + +#if defined(DEBUG) +/* Debug flags - enabled when defined */ +#define FTL_META_DEBUG 1 +#define FTL_DUMP_STATS 1 + +#define ftl_debug(msg, ...) \ + fprintf(stderr, msg, ## __VA_ARGS__) +#else +#define ftl_debug(msg, ...) +#endif + +static inline const char * +ftl_ppa2str(struct ftl_ppa ppa, char *buf, size_t size) +{ + snprintf(buf, size, "(grp: %u, pu: %u, chk: %u, lbk: %u)", + ppa.grp, ppa.pu, ppa.chk, ppa.lbk); + return buf; +} + +#if defined(FTL_META_DEBUG) +bool ftl_band_validate_md(struct ftl_band *band, const uint64_t *lba_map); +void ftl_dev_dump_bands(struct spdk_ftl_dev *dev); +#else +#define ftl_band_validate_md(band, lba_map) +#define ftl_dev_dump_bands(dev) +#endif + +#if defined(FTL_DUMP_STATS) +void ftl_dev_dump_stats(const struct spdk_ftl_dev *dev); +#else +#define ftl_dev_dump_stats(dev) +#endif + +#endif /* FTL_DEBUG_H */ diff --git a/lib/ftl/ftl_io.c b/lib/ftl/ftl_io.c new file mode 100644 index 000000000..a93c00b48 --- /dev/null +++ b/lib/ftl/ftl_io.c @@ -0,0 +1,371 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/ftl.h" + +#include "ftl_io.h" +#include "ftl_core.h" +#include "ftl_rwb.h" +#include "ftl_band.h" + +size_t +ftl_io_inc_req(struct ftl_io *io) +{ + struct ftl_band *band = io->band; + + if (io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) { + ftl_band_acquire_md(band); + } + + __atomic_fetch_add(&io->dev->num_inflight, 1, __ATOMIC_SEQ_CST); + + return ++io->req_cnt; +} + +size_t +ftl_io_dec_req(struct ftl_io *io) +{ + struct ftl_band *band = io->band; + unsigned long num_inflight __attribute__((unused)); + + if (io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) { + ftl_band_release_md(band); + } + + num_inflight = __atomic_fetch_sub(&io->dev->num_inflight, 1, __ATOMIC_SEQ_CST); + + assert(num_inflight > 0); + assert(io->req_cnt > 0); + + return --io->req_cnt; +} + +struct iovec * +ftl_io_iovec(struct ftl_io *io) +{ + if (io->iov_cnt > 1) { + return io->iovs; + } else { + return &io->iov; + } +} + +uint64_t +ftl_io_current_lba(struct ftl_io *io) +{ + if (io->flags & FTL_IO_VECTOR_LBA) { + return io->lbas[io->pos]; + } else { + return io->lba + io->pos; + } +} + +void +ftl_io_update_iovec(struct ftl_io *io, size_t lbk_cnt) +{ + struct iovec *iov = ftl_io_iovec(io); + size_t iov_lbks; + + while (lbk_cnt > 0) { + assert(io->iov_pos < io->iov_cnt); + iov_lbks = iov[io->iov_pos].iov_len / PAGE_SIZE; + + if (io->iov_off + lbk_cnt < iov_lbks) { + io->iov_off += lbk_cnt; + break; + } + + assert(iov_lbks > io->iov_off); + lbk_cnt -= (iov_lbks - io->iov_off); + io->iov_off = 0; + io->iov_pos++; + } +} + +size_t +ftl_iovec_num_lbks(struct iovec *iov, size_t iov_cnt) +{ + size_t lbks = 0, i = 0; + + for (; i < iov_cnt; ++i) { + lbks += iov[i].iov_len / PAGE_SIZE; + } + + return lbks; +} + +void * +ftl_io_iovec_addr(struct ftl_io *io) +{ + assert(io->iov_pos < io->iov_cnt); + assert(io->iov_off * PAGE_SIZE < ftl_io_iovec(io)[io->iov_pos].iov_len); + + return (char *)ftl_io_iovec(io)[io->iov_pos].iov_base + + io->iov_off * PAGE_SIZE; +} + +size_t +ftl_io_iovec_len_left(struct ftl_io *io) +{ + struct iovec *iov = ftl_io_iovec(io); + return iov[io->iov_pos].iov_len / PAGE_SIZE - io->iov_off; +} + +int +ftl_io_init_iovec(struct ftl_io *io, void *buf, + size_t iov_cnt, size_t req_size) +{ + struct iovec *iov; + size_t i; + + if (iov_cnt > 1) { + iov = io->iovs = calloc(iov_cnt, sizeof(*iov)); + if (!iov) { + return -ENOMEM; + } + } else { + iov = &io->iov; + } + + io->iov_pos = 0; + io->iov_cnt = iov_cnt; + for (i = 0; i < iov_cnt; ++i) { + iov[i].iov_base = (char *)buf + i * req_size * PAGE_SIZE; + iov[i].iov_len = req_size * PAGE_SIZE; + } + + return 0; +} + +static void +ftl_io_init(struct ftl_io *io, struct spdk_ftl_dev *dev, + spdk_ftl_fn fn, void *ctx, int flags, int type) +{ + io->flags |= flags | FTL_IO_INITIALIZED; + io->type = type; + io->dev = dev; + io->lba = FTL_LBA_INVALID; + io->cb.fn = fn; + io->cb.ctx = ctx; + io->trace = ftl_trace_alloc_group(dev->stats.trace); +} + +struct ftl_io * +ftl_io_init_internal(const struct ftl_io_init_opts *opts) +{ + struct ftl_io *io = opts->io; + struct spdk_ftl_dev *dev = opts->dev; + + if (!io) { + io = ftl_io_alloc(dev->ioch); + if (!io) { + return NULL; + } + } + + ftl_io_clear(io); + ftl_io_init(io, dev, opts->fn, io, opts->flags | FTL_IO_INTERNAL, opts->type); + + io->lbk_cnt = opts->iov_cnt * opts->req_size; + io->rwb_batch = opts->rwb_batch; + io->band = opts->band; + io->md = io->md; + + if (ftl_io_init_iovec(io, opts->data, opts->iov_cnt, opts->req_size)) { + if (!opts->io) { + ftl_io_free(io); + } + return NULL; + } + + return io; +} + +struct ftl_io * +ftl_io_rwb_init(struct spdk_ftl_dev *dev, struct ftl_band *band, + struct ftl_rwb_batch *batch, spdk_ftl_fn cb) +{ + struct ftl_io_init_opts opts = { + .dev = dev, + .io = NULL, + .rwb_batch = batch, + .band = band, + .size = sizeof(struct ftl_io), + .flags = 0, + .type = FTL_IO_WRITE, + .iov_cnt = 1, + .req_size = dev->xfer_size, + .fn = cb, + .data = ftl_rwb_batch_get_data(batch), + .md = ftl_rwb_batch_get_md(batch), + }; + + return ftl_io_init_internal(&opts); +} + +struct ftl_io * +ftl_io_erase_init(struct ftl_band *band, size_t lbk_cnt, spdk_ftl_fn cb) +{ + struct ftl_io *io; + struct ftl_io_init_opts opts = { + .dev = band->dev, + .io = NULL, + .rwb_batch = NULL, + .band = band, + .size = sizeof(struct ftl_io), + .flags = FTL_IO_PPA_MODE, + .type = FTL_IO_ERASE, + .iov_cnt = 0, + .req_size = 1, + .fn = cb, + .data = NULL, + .md = NULL, + }; + + io = ftl_io_init_internal(&opts); + io->lbk_cnt = lbk_cnt; + + return io; +} + +void +ftl_io_user_init(struct spdk_ftl_dev *dev, struct ftl_io *io, uint64_t lba, size_t lbk_cnt, + struct iovec *iov, size_t iov_cnt, + spdk_ftl_fn cb_fn, void *cb_arg, int type) +{ + if (io->flags & FTL_IO_INITIALIZED) { + return; + } + + ftl_io_init(io, dev, cb_fn, cb_arg, 0, type); + + io->lba = lba; + io->lbk_cnt = lbk_cnt; + io->iov_cnt = iov_cnt; + + if (iov_cnt > 1) { + io->iovs = iov; + } else { + io->iov = *iov; + } + + ftl_trace(lba_io_init, ftl_dev_trace(io->dev), io); +} + +void +ftl_io_complete(struct ftl_io *io) +{ + int keep_alive = io->flags & FTL_IO_KEEP_ALIVE; + + io->flags &= ~FTL_IO_INITIALIZED; + io->cb.fn(io->cb.ctx, io->status); + + if (!keep_alive) { + ftl_io_free(io); + } +} + +void +ftl_io_process_error(struct ftl_io *io, const struct spdk_nvme_cpl *status) +{ + io->status = -EIO; + + /* TODO: add error handling for specifc cases */ + if (status->status.sct == SPDK_NVME_SCT_MEDIA_ERROR && + status->status.sc == SPDK_OCSSD_SC_READ_HIGH_ECC) { + io->status = 0; + } +} + +void * +ftl_io_get_md(const struct ftl_io *io) +{ + if (!io->md) { + return NULL; + } + + return (char *)io->md + io->pos * FTL_BLOCK_SIZE; +} + +struct ftl_io * +ftl_io_alloc(struct spdk_io_channel *ch) +{ + struct ftl_io *io; + struct ftl_io_channel *ioch = spdk_io_channel_get_ctx(ch); + + io = spdk_mempool_get(ioch->io_pool); + if (!io) { + return NULL; + } + + memset(io, 0, ioch->elem_size); + io->ch = ch; + return io; +} + +void +ftl_io_reinit(struct ftl_io *io, spdk_ftl_fn fn, void *ctx, int flags, int type) +{ + ftl_io_clear(io); + ftl_io_init(io, io->dev, fn, ctx, flags, type); +} + +void +ftl_io_clear(struct ftl_io *io) +{ + io->pos = 0; + io->req_cnt = 0; + io->iov_pos = 0; + io->iov_off = 0; + io->flags = 0; + io->rwb_batch = NULL; + io->band = NULL; +} + +void +ftl_io_free(struct ftl_io *io) +{ + struct ftl_io_channel *ioch; + + if (!io) { + return; + } + + if ((io->flags & FTL_IO_INTERNAL) && io->iov_cnt > 1) { + free(io->iovs); + } + + ioch = spdk_io_channel_get_ctx(io->ch); + spdk_mempool_put(ioch->io_pool, io); +} diff --git a/lib/ftl/ftl_io.h b/lib/ftl/ftl_io.h index b5f0f9409..afa293251 100644 --- a/lib/ftl/ftl_io.h +++ b/lib/ftl/ftl_io.h @@ -34,9 +34,10 @@ #ifndef FTL_IO_H #define FTL_IO_H -#include -#include -#include +#include "spdk/stdinc.h" +#include "spdk/nvme.h" +#include "spdk/ftl.h" + #include "ftl_ppa.h" #include "ftl_trace.h" @@ -250,8 +251,6 @@ void *ftl_io_iovec_addr(struct ftl_io *io); size_t ftl_io_iovec_len_left(struct ftl_io *io); int ftl_io_init_iovec(struct ftl_io *io, void *buf, size_t iov_cnt, size_t req_size); -void ftl_io_init(struct ftl_io *io, struct spdk_ftl_dev *dev, - spdk_ftl_fn cb, void *ctx, int flags, int type); struct ftl_io *ftl_io_init_internal(const struct ftl_io_init_opts *opts); struct ftl_io *ftl_io_rwb_init(struct spdk_ftl_dev *dev, struct ftl_band *band, struct ftl_rwb_batch *entry, spdk_ftl_fn cb); diff --git a/lib/ftl/ftl_ppa.h b/lib/ftl/ftl_ppa.h index c4e11372f..6c620ab75 100644 --- a/lib/ftl/ftl_ppa.h +++ b/lib/ftl/ftl_ppa.h @@ -34,7 +34,7 @@ #ifndef FTL_PPA_H #define FTL_PPA_H -#include +#include "spdk/stdinc.h" /* Marks PPA as invalid */ #define FTL_PPA_INVALID (-1) diff --git a/lib/ftl/ftl_rwb.c b/lib/ftl/ftl_rwb.c new file mode 100644 index 000000000..478f84a14 --- /dev/null +++ b/lib/ftl/ftl_rwb.c @@ -0,0 +1,461 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/util.h" + +#include "ftl_rwb.h" +#include "ftl_core.h" + +struct ftl_rwb_batch { + /* Parent RWB */ + struct ftl_rwb *rwb; + + /* Position within RWB */ + unsigned int pos; + + /* Number of acquired entries */ + unsigned int num_acquired; + + /* Number of entries ready for submission */ + unsigned int num_ready; + + /* RWB entry list */ + LIST_HEAD(, ftl_rwb_entry) entry_list; + + /* Entry buffer */ + struct ftl_rwb_entry *entries; + + /* Data buffer */ + void *buffer; + + /* Metadata buffer */ + void *md_buffer; + + /* Queue entry */ + STAILQ_ENTRY(ftl_rwb_batch) stailq; +}; + +struct ftl_rwb { + /* Number of batches */ + size_t num_batches; + + /* Number of entries per batch */ + size_t xfer_size; + + /* Metadata's size */ + size_t md_size; + + /* Number of acquired entries */ + unsigned int num_acquired[FTL_RWB_TYPE_MAX]; + + /* User/internal limits */ + size_t limits[FTL_RWB_TYPE_MAX]; + + /* Current batch */ + struct ftl_rwb_batch *current; + + /* Free batch queue */ + STAILQ_HEAD(, ftl_rwb_batch) free_queue; + + /* Submission batch queue */ + struct spdk_ring *submit_queue; + + /* Batch buffer */ + struct ftl_rwb_batch *batches; + + /* RWB lock */ + pthread_spinlock_t lock; +}; + +static int +ftl_rwb_batch_full(const struct ftl_rwb_batch *batch, size_t batch_size) +{ + struct ftl_rwb *rwb = batch->rwb; + assert(batch_size <= rwb->xfer_size); + return batch_size == rwb->xfer_size; +} + +static void +ftl_rwb_batch_init_entry(struct ftl_rwb_batch *batch, size_t pos) +{ + struct ftl_rwb *rwb = batch->rwb; + struct ftl_rwb_entry *entry, *prev; + size_t batch_offset = pos % rwb->xfer_size; + + entry = &batch->entries[batch_offset]; + entry->pos = pos; + entry->data = ((char *)batch->buffer) + FTL_BLOCK_SIZE * batch_offset; + entry->md = rwb->md_size ? ((char *)batch->md_buffer) + rwb->md_size * batch_offset : NULL; + entry->batch = batch; + entry->rwb = batch->rwb; + pthread_spin_init(&entry->lock, PTHREAD_PROCESS_PRIVATE); + + if (batch_offset > 0) { + prev = &batch->entries[batch_offset - 1]; + LIST_INSERT_AFTER(prev, entry, list_entry); + } else { + LIST_INSERT_HEAD(&batch->entry_list, entry, list_entry); + } +} + +static int +ftl_rwb_batch_init(struct ftl_rwb *rwb, struct ftl_rwb_batch *batch, unsigned int pos) +{ + size_t md_size, i; + + md_size = spdk_divide_round_up(rwb->md_size * rwb->xfer_size, FTL_BLOCK_SIZE) * + FTL_BLOCK_SIZE; + batch->rwb = rwb; + batch->pos = pos; + + batch->entries = calloc(rwb->xfer_size, sizeof(*batch->entries)); + if (!batch->entries) { + return -1; + } + + batch->buffer = spdk_dma_zmalloc(FTL_BLOCK_SIZE * rwb->xfer_size, + FTL_BLOCK_SIZE, NULL); + if (!batch->buffer) { + goto error; + } + + if (md_size > 0) { + batch->md_buffer = spdk_dma_zmalloc(md_size, FTL_BLOCK_SIZE, NULL); + if (!batch->md_buffer) { + goto error; + } + } + + LIST_INIT(&batch->entry_list); + + for (i = 0; i < rwb->xfer_size; ++i) { + ftl_rwb_batch_init_entry(batch, pos * rwb->xfer_size + i); + } + + return 0; +error: + free(batch->entries); + spdk_dma_free(batch->buffer); + return -1; +} + +struct ftl_rwb * +ftl_rwb_init(const struct spdk_ftl_conf *conf, size_t xfer_size, size_t md_size) +{ + struct ftl_rwb *rwb; + struct ftl_rwb_batch *batch; + size_t ring_size, i; + + rwb = calloc(1, sizeof(*rwb)); + if (!rwb) { + goto error; + } + + assert(conf->rwb_size % xfer_size == 0); + + rwb->xfer_size = xfer_size; + rwb->md_size = md_size; + rwb->num_batches = conf->rwb_size / (FTL_BLOCK_SIZE * xfer_size); + + ring_size = spdk_align32pow2(rwb->num_batches); + + rwb->batches = calloc(rwb->num_batches, sizeof(*rwb->batches)); + if (!rwb->batches) { + goto error; + } + + rwb->submit_queue = spdk_ring_create(SPDK_RING_TYPE_MP_SC, ring_size, + SPDK_ENV_SOCKET_ID_ANY); + if (!rwb->submit_queue) { + SPDK_ERRLOG("Failed to create submission queue\n"); + goto error; + } + + /* TODO: use rte_ring with SP / MC */ + STAILQ_INIT(&rwb->free_queue); + + for (i = 0; i < rwb->num_batches; ++i) { + batch = &rwb->batches[i]; + + if (ftl_rwb_batch_init(rwb, batch, i)) { + SPDK_ERRLOG("Failed to initialize RWB entry buffer\n"); + goto error; + } + + STAILQ_INSERT_TAIL(&rwb->free_queue, batch, stailq); + } + + for (unsigned int i = 0; i < FTL_RWB_TYPE_MAX; ++i) { + rwb->limits[i] = ftl_rwb_entry_cnt(rwb); + } + + pthread_spin_init(&rwb->lock, PTHREAD_PROCESS_PRIVATE); + return rwb; +error: + ftl_rwb_free(rwb); + return NULL; +} + +void +ftl_rwb_free(struct ftl_rwb *rwb) +{ + struct ftl_rwb_entry *entry; + struct ftl_rwb_batch *batch; + + if (!rwb) { + return; + } + + for (size_t i = 0; i < rwb->num_batches; ++i) { + batch = &rwb->batches[i]; + + ftl_rwb_foreach(entry, batch) { + pthread_spin_destroy(&entry->lock); + } + + spdk_dma_free(batch->buffer); + spdk_dma_free(batch->md_buffer); + free(batch->entries); + } + + pthread_spin_destroy(&rwb->lock); + spdk_ring_free(rwb->submit_queue); + free(rwb->batches); + free(rwb); +} + +void +ftl_rwb_batch_release(struct ftl_rwb_batch *batch) +{ + struct ftl_rwb *rwb = batch->rwb; + struct ftl_rwb_entry *entry; + unsigned int num_acquired __attribute__((unused)); + + batch->num_ready = 0; + batch->num_acquired = 0; + + ftl_rwb_foreach(entry, batch) { + num_acquired = __atomic_fetch_sub(&rwb->num_acquired[ftl_rwb_entry_type(entry)], 1, + __ATOMIC_SEQ_CST); + assert(num_acquired > 0); + } + + pthread_spin_lock(&rwb->lock); + STAILQ_INSERT_TAIL(&rwb->free_queue, batch, stailq); + pthread_spin_unlock(&rwb->lock); +} + +size_t +ftl_rwb_entry_cnt(const struct ftl_rwb *rwb) +{ + return rwb->num_batches * rwb->xfer_size; +} + +size_t +ftl_rwb_num_batches(const struct ftl_rwb *rwb) +{ + return rwb->num_batches; +} + +size_t +ftl_rwb_batch_get_offset(const struct ftl_rwb_batch *batch) +{ + return batch->pos; +} + +void +ftl_rwb_set_limits(struct ftl_rwb *rwb, + const size_t limit[FTL_RWB_TYPE_MAX]) +{ + assert(limit[FTL_RWB_TYPE_USER] <= ftl_rwb_entry_cnt(rwb)); + assert(limit[FTL_RWB_TYPE_INTERNAL] <= ftl_rwb_entry_cnt(rwb)); + memcpy(rwb->limits, limit, sizeof(rwb->limits)); +} + +void +ftl_rwb_get_limits(struct ftl_rwb *rwb, + size_t limit[FTL_RWB_TYPE_MAX]) +{ + memcpy(limit, rwb->limits, sizeof(rwb->limits)); +} + +size_t +ftl_rwb_num_acquired(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type) +{ + return __atomic_load_n(&rwb->num_acquired[type], __ATOMIC_SEQ_CST); +} + +void +ftl_rwb_batch_revert(struct ftl_rwb_batch *batch) +{ + struct ftl_rwb *rwb = batch->rwb; + + if (spdk_ring_enqueue(rwb->submit_queue, (void **)&batch, 1) != 1) { + assert(0 && "Should never happen"); + } +} + +void +ftl_rwb_push(struct ftl_rwb_entry *entry) +{ + struct ftl_rwb_batch *batch = entry->batch; + struct ftl_rwb *rwb = batch->rwb; + size_t batch_size; + + batch_size = __atomic_fetch_add(&batch->num_ready, 1, __ATOMIC_SEQ_CST) + 1; + + /* Once all of the entries are put back, push the batch on the */ + /* submission queue */ + if (ftl_rwb_batch_full(batch, batch_size)) { + if (spdk_ring_enqueue(rwb->submit_queue, (void **)&batch, 1) != 1) { + assert(0 && "Should never happen"); + } + } +} + +static int +ftl_rwb_check_limits(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type) +{ + return ftl_rwb_num_acquired(rwb, type) >= rwb->limits[type]; +} + +struct ftl_rwb_entry * +ftl_rwb_acquire(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type) +{ + struct ftl_rwb_entry *entry = NULL; + struct ftl_rwb_batch *current; + + if (ftl_rwb_check_limits(rwb, type)) { + return NULL; + } + + pthread_spin_lock(&rwb->lock); + + current = rwb->current; + if (!current) { + current = STAILQ_FIRST(&rwb->free_queue); + if (!current) { + goto error; + } + + STAILQ_REMOVE(&rwb->free_queue, current, ftl_rwb_batch, stailq); + rwb->current = current; + } + + entry = ¤t->entries[current->num_acquired++]; + + /* If the whole batch is filled, clear the current batch pointer */ + if (current->num_acquired >= rwb->xfer_size) { + rwb->current = NULL; + } + + pthread_spin_unlock(&rwb->lock); + __atomic_fetch_add(&rwb->num_acquired[type], 1, __ATOMIC_SEQ_CST); + return entry; +error: + pthread_spin_unlock(&rwb->lock); + return NULL; +} + +struct ftl_rwb_batch * +ftl_rwb_pop(struct ftl_rwb *rwb) +{ + struct ftl_rwb_batch *batch = NULL; + + if (spdk_ring_dequeue(rwb->submit_queue, (void **)&batch, 1) != 1) { + return NULL; + } + + return batch; +} + +static struct ftl_rwb_batch * +_ftl_rwb_next_batch(struct ftl_rwb *rwb, size_t pos) +{ + if (pos >= rwb->num_batches) { + return NULL; + } + + return &rwb->batches[pos]; +} + +struct ftl_rwb_batch * +ftl_rwb_next_batch(struct ftl_rwb_batch *batch) +{ + return _ftl_rwb_next_batch(batch->rwb, batch->pos + 1); +} + +struct ftl_rwb_batch * +ftl_rwb_first_batch(struct ftl_rwb *rwb) +{ + return _ftl_rwb_next_batch(rwb, 0); +} + +int +ftl_rwb_batch_empty(struct ftl_rwb_batch *batch) +{ + return __atomic_load_n(&batch->num_ready, __ATOMIC_SEQ_CST) == 0; +} + +void * +ftl_rwb_batch_get_data(struct ftl_rwb_batch *batch) +{ + return batch->buffer; +} + +void * +ftl_rwb_batch_get_md(struct ftl_rwb_batch *batch) +{ + return batch->md_buffer; +} + +struct ftl_rwb_entry * +ftl_rwb_entry_from_offset(struct ftl_rwb *rwb, size_t offset) +{ + unsigned int b_off, e_off; + + b_off = offset / rwb->xfer_size; + e_off = offset % rwb->xfer_size; + + assert(b_off < rwb->num_batches); + + return &rwb->batches[b_off].entries[e_off]; +} + +struct ftl_rwb_entry * +ftl_rwb_batch_first_entry(struct ftl_rwb_batch *batch) +{ + return LIST_FIRST(&batch->entry_list); +} diff --git a/lib/ftl/ftl_rwb.h b/lib/ftl/ftl_rwb.h new file mode 100644 index 000000000..406bf5de6 --- /dev/null +++ b/lib/ftl/ftl_rwb.h @@ -0,0 +1,162 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FTL_RWB_H +#define FTL_RWB_H + +#include "spdk/stdinc.h" +#include "spdk/queue.h" + +#include "ftl_io.h" +#include "ftl_ppa.h" +#include "ftl_trace.h" + +struct ftl_rwb; +struct spdk_ftl_conf; +struct ftl_rwb_batch; + +enum ftl_rwb_entry_type { + FTL_RWB_TYPE_INTERNAL, + FTL_RWB_TYPE_USER, + FTL_RWB_TYPE_MAX +}; + +/* Write buffer entry */ +struct ftl_rwb_entry { + /* Owner rwb */ + struct ftl_rwb *rwb; + + /* Batch containing the entry */ + struct ftl_rwb_batch *batch; + + /* Logical address */ + uint64_t lba; + + /* Physical address */ + struct ftl_ppa ppa; + + /* Position within the rwb's buffer */ + unsigned int pos; + + /* Data pointer */ + void *data; + + /* Metadata pointer */ + void *md; + + /* Data/state lock */ + pthread_spinlock_t lock; + + /* Flags */ + unsigned int flags; + + /* Indicates whether the entry is part of cache and is assigned a PPA */ + bool valid; + + /* Trace group id */ + ftl_trace_group_t trace; + + /* Batch list entry */ + LIST_ENTRY(ftl_rwb_entry) list_entry; +}; + +struct ftl_rwb *ftl_rwb_init(const struct spdk_ftl_conf *conf, size_t xfer_size, size_t md_size); +void ftl_rwb_free(struct ftl_rwb *rwb); +void ftl_rwb_batch_release(struct ftl_rwb_batch *batch); +void ftl_rwb_push(struct ftl_rwb_entry *entry); +size_t ftl_rwb_entry_cnt(const struct ftl_rwb *rwb); +void ftl_rwb_set_limits(struct ftl_rwb *rwb, const size_t limit[FTL_RWB_TYPE_MAX]); +void ftl_rwb_get_limits(struct ftl_rwb *rwb, size_t limit[FTL_RWB_TYPE_MAX]); +size_t ftl_rwb_num_acquired(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type); +size_t ftl_rwb_num_batches(const struct ftl_rwb *rwb); +struct ftl_rwb_entry *ftl_rwb_acquire(struct ftl_rwb *rwb, enum ftl_rwb_entry_type type); +struct ftl_rwb_batch *ftl_rwb_pop(struct ftl_rwb *rwb); +struct ftl_rwb_batch *ftl_rwb_first_batch(struct ftl_rwb *rwb); +struct ftl_rwb_batch *ftl_rwb_next_batch(struct ftl_rwb_batch *batch); +int ftl_rwb_batch_empty(struct ftl_rwb_batch *batch); +struct ftl_rwb_entry *ftl_rwb_entry_from_offset(struct ftl_rwb *rwb, size_t offset); +size_t ftl_rwb_batch_get_offset(const struct ftl_rwb_batch *batch); +void ftl_rwb_batch_revert(struct ftl_rwb_batch *batch); +struct ftl_rwb_entry *ftl_rwb_batch_first_entry(struct ftl_rwb_batch *batch); +void *ftl_rwb_batch_get_data(struct ftl_rwb_batch *batch); +void *ftl_rwb_batch_get_md(struct ftl_rwb_batch *batch); + +static inline void +_ftl_rwb_entry_set_valid(struct ftl_rwb_entry *entry, bool valid) +{ + __atomic_store_n(&entry->valid, valid, __ATOMIC_SEQ_CST); +} + +static inline void +ftl_rwb_entry_set_valid(struct ftl_rwb_entry *entry) +{ + _ftl_rwb_entry_set_valid(entry, true); +} + +static inline void +ftl_rwb_entry_invalidate(struct ftl_rwb_entry *entry) +{ + _ftl_rwb_entry_set_valid(entry, false); +} + +static inline int +ftl_rwb_entry_valid(struct ftl_rwb_entry *entry) +{ + return __atomic_load_n(&entry->valid, __ATOMIC_SEQ_CST); +} + +static inline enum ftl_rwb_entry_type +ftl_rwb_type_from_flags(int flags) { + return (flags & FTL_IO_INTERNAL) ? FTL_RWB_TYPE_INTERNAL : FTL_RWB_TYPE_USER; +} + +static inline enum ftl_rwb_entry_type +ftl_rwb_entry_type(const struct ftl_rwb_entry *entry) { + return ftl_rwb_type_from_flags(entry->flags); +} + +static inline int +ftl_rwb_entry_internal(const struct ftl_rwb_entry *entry) +{ + return ftl_rwb_entry_type(entry) == FTL_RWB_TYPE_INTERNAL; +} + +#define ftl_rwb_foreach(entry, batch) \ + for (entry = ftl_rwb_batch_first_entry(batch); \ + entry; entry = LIST_NEXT(entry, list_entry)) + +#define ftl_rwb_foreach_batch(batch, rwb) \ + for (batch = ftl_rwb_first_batch(rwb); batch; \ + batch = ftl_rwb_next_batch(batch)) + +#endif /* FTL_RWB_H */ diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk index dd16a0123..c15e0ea5e 100644 --- a/mk/spdk.modules.mk +++ b/mk/spdk.modules.mk @@ -66,6 +66,10 @@ BLOCKDEV_MODULES_LIST += bdev_pmem SYS_LIBS += -lpmemblk endif +ifeq ($(CONFIG_FTL),y) +BLOCKDEV_MODULES_LIST += ftl +endif + SOCK_MODULES_LIST = sock_posix ifeq ($(CONFIG_VPP),y) diff --git a/test/common/autotest_common.sh b/test/common/autotest_common.sh index 91b170230..0fad11fe8 100644 --- a/test/common/autotest_common.sh +++ b/test/common/autotest_common.sh @@ -60,6 +60,7 @@ fi : ${SPDK_RUN_UBSAN=1}; export SPDK_RUN_UBSAN : ${SPDK_RUN_INSTALLED_DPDK=1}; export SPDK_RUN_INSTALLED_DPDK : ${SPDK_TEST_CRYPTO=1}; export SPDK_TEST_CRYPTO +: ${SPDK_TEST_FTL=0}; export SPDK_TEST_FTL if [ -z "$DEPENDENCY_DIR" ]; then export DEPENDENCY_DIR=/home/sys_sgsw @@ -187,6 +188,10 @@ if [ ! -d "${DEPENDENCY_DIR}/nvme-cli" ]; then export SPDK_TEST_NVME_CLI=0 fi +if [ $SPDK_TEST_FTL -eq 1 ]; then + config_params+=' --with-ftl' +fi + export config_params if [ -z "$output_dir" ]; then