lib/ftl: mirror writes to non-volatile cache

Apart from writing the data to OCSSD, mirror the latest two bands of data on the persistent write buffer cache. Currently the data is only sent there, further patches will add metadata support, shutdown recovery and L2P updates. Change-Id: Ief05d0c23fa0e25bd6085e0ce3e1528d6736d174 Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/450266 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
2019-04-05 13:31:19 +02:00 · 2019-04-05 13:31:19 +02:00 · 36ff0eeb88
commit 36ff0eeb88
parent aba66c0644
4 changed files with 185 additions and 8 deletions
--- a/lib/ftl/ftl_core.c
+++ b/lib/ftl/ftl_core.c
@ -36,6 +36,7 @@
 #include "spdk/nvme.h"
 #include "spdk/io_channel.h"
 #include "spdk/bdev_module.h"
+#include "spdk/string.h"
 #include "spdk_internal/log.h"
 #include "spdk/ftl.h"

@ -198,7 +199,10 @@ static void
 ftl_md_write_cb(void *arg, int status)
 {
 	struct ftl_io *io = arg;
+	struct spdk_ftl_dev *dev = io->dev;
+	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
 	struct ftl_wptr *wptr;
+	struct spdk_bdev *bdev;

 	wptr = ftl_wptr_from_band(io->band);

@ -209,6 +213,18 @@ ftl_md_write_cb(void *arg, int status)

 	ftl_band_set_next_state(io->band);
 	if (io->band->state == FTL_BAND_STATE_CLOSED) {
+		if (nv_cache->bdev_desc) {
+			bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+
+			pthread_spin_lock(&nv_cache->lock);
+			nv_cache->num_available += ftl_band_user_lbks(io->band);
+
+			if (spdk_unlikely(nv_cache->num_available > spdk_bdev_get_num_blocks(bdev))) {
+				nv_cache->num_available = spdk_bdev_get_num_blocks(bdev);
+			}
+			pthread_spin_unlock(&nv_cache->lock);
+		}
+
 		ftl_remove_wptr(wptr);
 	}
 }
@ -853,6 +869,150 @@ ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_rwb_batch *batch)
 	}
 }

+static uint64_t
+ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_lbks)
+{
+	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+	uint64_t num_available, cache_size, cache_addr = FTL_LBA_INVALID;
+
+	cache_size = spdk_bdev_get_num_blocks(bdev);
+
+	pthread_spin_lock(&nv_cache->lock);
+	if (spdk_unlikely(nv_cache->num_available == 0)) {
+		goto out;
+	}
+
+	num_available = spdk_min(nv_cache->num_available, *num_lbks);
+	if (spdk_unlikely(nv_cache->current_addr + num_available > cache_size)) {
+		*num_lbks = cache_size - nv_cache->current_addr;
+	} else {
+		*num_lbks = num_available;
+	}
+
+	cache_addr = nv_cache->current_addr;
+	nv_cache->current_addr += *num_lbks;
+	nv_cache->num_available -= *num_lbks;
+
+	if (nv_cache->current_addr == spdk_bdev_get_num_blocks(bdev)) {
+		nv_cache->current_addr = 0;
+	}
+out:
+	pthread_spin_unlock(&nv_cache->lock);
+	return cache_addr;
+}
+
+static struct ftl_io *
+ftl_alloc_io_nv_cache(struct ftl_io *parent, size_t num_lbks)
+{
+	struct ftl_io_init_opts opts = {
+		.dev		= parent->dev,
+		.parent		= parent,
+		.iov_cnt	= 1,
+		.data		= ftl_io_iovec_addr(parent),
+		.req_size	= num_lbks,
+		.flags		= FTL_IO_CACHE,
+	};
+
+	return ftl_io_init_internal(&opts);
+}
+
+static void
+ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct ftl_io *io = cb_arg;
+
+	if (spdk_unlikely(!success)) {
+		SPDK_ERRLOG("Non-volatile cache write failed at %"PRIx64"\n", io->ppa.ppa);
+		io->status = -EIO;
+	}
+
+	ftl_io_dec_req(io);
+	if (ftl_io_done(io)) {
+		ftl_io_complete(io);
+	}
+
+	spdk_bdev_free_io(bdev_io);
+}
+
+static void
+ftl_submit_nv_cache(void *ctx)
+{
+	struct ftl_io *io = ctx;
+	struct spdk_ftl_dev *dev = io->dev;
+	struct spdk_thread *thread;
+	struct ftl_io_channel *ioch;
+	int rc;
+
+	ioch = spdk_io_channel_get_ctx(io->ioch);
+	thread = spdk_io_channel_get_thread(io->ioch);
+
+	rc = spdk_bdev_write_blocks(dev->nv_cache.bdev_desc, ioch->cache_ioch,
+				    ftl_io_iovec_addr(io), io->ppa.ppa, io->lbk_cnt,
+				    ftl_nv_cache_submit_cb, io);
+	if (rc == -ENOMEM) {
+		spdk_thread_send_msg(thread, ftl_submit_nv_cache, io);
+		return;
+	} else if (rc) {
+		SPDK_ERRLOG("Write to persistent cache failed: %s (%"PRIu64", %"PRIu64")\n",
+			    spdk_strerror(-rc), io->ppa.ppa, io->lbk_cnt);
+		io->status = -EIO;
+		ftl_io_complete(io);
+		return;
+	}
+
+	ftl_io_advance(io, io->lbk_cnt);
+	ftl_io_inc_req(io);
+}
+
+static void
+_ftl_write_nv_cache(void *ctx)
+{
+	struct ftl_io *child, *io = ctx;
+	struct spdk_ftl_dev *dev = io->dev;
+	struct spdk_thread *thread;
+	uint64_t num_lbks;
+
+	thread = spdk_io_channel_get_thread(io->ioch);
+
+	while (io->pos < io->lbk_cnt) {
+		num_lbks = ftl_io_iovec_len_left(io);
+
+		child = ftl_alloc_io_nv_cache(io, num_lbks);
+		if (spdk_unlikely(!child)) {
+			spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
+			return;
+		}
+
+		/* Reserve area on the write buffer cache */
+		child->ppa.ppa = ftl_reserve_nv_cache(&dev->nv_cache, &num_lbks);
+		if (child->ppa.ppa == FTL_LBA_INVALID) {
+			ftl_io_free(child);
+			spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
+			break;
+		}
+
+		/* Shrink the IO if there isn't enough room in the cache to fill the whole iovec */
+		if (spdk_unlikely(num_lbks != ftl_io_iovec_len_left(io))) {
+			ftl_io_shrink_iovec(child, ftl_io_iovec_addr(child), 1, num_lbks);
+		}
+
+		ftl_submit_nv_cache(child);
+		ftl_io_advance(io, num_lbks);
+	}
+
+	if (ftl_io_done(io)) {
+		ftl_io_complete(io);
+	}
+}
+
+static void
+ftl_write_nv_cache(struct ftl_io *parent)
+{
+	ftl_io_reset(parent);
+	parent->flags |= FTL_IO_CACHE;
+	_ftl_write_nv_cache(parent);
+}
+
 static void
 ftl_write_fail(struct ftl_io *io, int status)
 {
@ -1288,7 +1448,14 @@ ftl_rwb_fill(struct ftl_io *io)
 		ftl_rwb_push(entry);
 	}

+	if (ftl_io_done(io)) {
+		if (dev->nv_cache.bdev_desc) {
+			ftl_write_nv_cache(io);
+		} else {
 			ftl_io_complete(io);
+		}
+	}
+
 	return 0;
 }

--- a/lib/ftl/ftl_io.c
+++ b/lib/ftl/ftl_io.c
@ -45,7 +45,7 @@ ftl_io_inc_req(struct ftl_io *io)
 {
 	struct ftl_band *band = io->band;

-	if (io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) {
+	if (!(io->flags & FTL_IO_CACHE) && io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) {
 		ftl_band_acquire_md(band);
 	}

@ -60,7 +60,7 @@ ftl_io_dec_req(struct ftl_io *io)
 	struct ftl_band *band = io->band;
 	unsigned long num_inflight __attribute__((unused));

-	if (io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) {
+	if (!(io->flags & FTL_IO_CACHE) && io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) {
 		ftl_band_release_md(band);
 	}

@ -469,16 +469,20 @@ ftl_io_reinit(struct ftl_io *io, spdk_ftl_fn fn, void *ctx, int flags, int type)
 void
 ftl_io_clear(struct ftl_io *io)
 {
-	io->pos = 0;
-	io->iov_pos = 0;
-	io->iov_off = 0;
-	io->done = false;
-	io->req_cnt = 0;
+	ftl_io_reset(io);
+
 	io->flags = 0;
 	io->rwb_batch = NULL;
 	io->band = NULL;
 }

+void
+ftl_io_reset(struct ftl_io *io)
+{
+	io->req_cnt = io->pos = io->iov_pos = io->iov_off = 0;
+	io->done = false;
+}
+
 void
 ftl_io_free(struct ftl_io *io)
 {
--- a/lib/ftl/ftl_io.h
+++ b/lib/ftl/ftl_io.h
@ -70,6 +70,8 @@ enum ftl_io_flags {
 	FTL_IO_VECTOR_LBA	= (1 << 7),
 	/* Indicates that IO is being retried */
 	FTL_IO_RETRY		= (1 << 8),
+	/* The IO is directed to non-volatile cache */
+	FTL_IO_CACHE		= (1 << 9),
 };

 enum ftl_io_type {
@ -285,5 +287,6 @@ void *ftl_io_get_md(const struct ftl_io *io);
 void ftl_io_complete(struct ftl_io *io);
 void ftl_io_shrink_iovec(struct ftl_io *io, char *buf, size_t iov_cnt, size_t req_size);
 void ftl_io_process_error(struct ftl_io *io, const struct spdk_nvme_cpl *status);
+void ftl_io_reset(struct ftl_io *io);

 #endif /* FTL_IO_H */
--- a/test/unit/lib/ftl/ftl_wptr/ftl_wptr_ut.c
+++ b/test/unit/lib/ftl/ftl_wptr/ftl_wptr_ut.c
@ -76,6 +76,8 @@ DEFINE_STUB(spdk_nvme_ocssd_ns_cmd_vector_reset, int, (struct spdk_nvme_ns *ns,
 		struct spdk_nvme_qpair *qpair, uint64_t *lba_list, uint32_t num_lbas,
 		struct spdk_ocssd_chunk_information_entry *chunk_info,
 		spdk_nvme_cmd_cb cb_fn, void *cb_arg), 0);
+DEFINE_STUB(spdk_bdev_desc_get_bdev, struct spdk_bdev *, (struct spdk_bdev_desc *dsc), NULL);
+DEFINE_STUB(spdk_bdev_get_num_blocks, uint64_t, (const struct spdk_bdev *bdev), 0);

 struct ftl_io *
 ftl_io_erase_init(struct ftl_band *band, size_t lbk_cnt, spdk_ftl_fn cb)
@ -157,6 +159,7 @@ test_wptr(void)
 		ftl_band_set_state(band, FTL_BAND_STATE_OPENING);
 		ftl_band_set_state(band, FTL_BAND_STATE_OPEN);
 		io.band = band;
+		io.dev = dev;

 		for (lbk = 0, offset = 0; lbk < ftl_dev_lbks_in_chunk(dev) / xfer_size; ++lbk) {
 			for (chunk = 0; chunk < band->num_chunks; ++chunk) {