From 18b1de97d872dcc605a9f3529aa3bb408f66d50e Mon Sep 17 00:00:00 2001
From: Konrad Sztyber <konrad.sztyber@intel.com>
Date: Wed, 29 May 2019 12:42:56 +0200
Subject: [PATCH] lib/ftl: store metadata on non-volatile cache

Send LBA along with the data block when mirroring writes to the
non-volatile cache. The metadata buffer is retrieved from the metadata
pool, so the maximum number of concurrent requests is limited to
nv_cache.max_request_cnt, while the number of blocks in a single request
is limited by nv_cache.max_requets_size.

Change-Id: If260302d16039183fb0fe073ef7419947532cfab
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/458093
Reviewed-by: Mateusz Kozlowski <mateusz.kozlowski@intel.com>
Reviewed-by: Wojciech Malikowski <wojciech.malikowski@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
---
 lib/ftl/ftl_core.c | 35 ++++++++++++++++++++++++++++++++---
 lib/ftl/ftl_init.c | 12 ++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/lib/ftl/ftl_core.c b/lib/ftl/ftl_core.c
index ca217fd4f..38fd9093c 100644
--- a/lib/ftl/ftl_core.c
+++ b/lib/ftl/ftl_core.c
@@ -952,6 +952,7 @@ static uint64_t
 ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_lbks)
 {
 	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+	struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache);
 	uint64_t num_available, cache_size, cache_addr = FTL_LBA_INVALID;
 
 	cache_size = spdk_bdev_get_num_blocks(bdev);
@@ -962,6 +963,8 @@ ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_lbks)
 	}
 
 	num_available = spdk_min(nv_cache->num_available, *num_lbks);
+	num_available = spdk_min(num_available, dev->conf.nv_cache.max_request_cnt);
+
 	if (spdk_unlikely(nv_cache->current_addr + num_available > cache_size)) {
 		*num_lbks = cache_size - nv_cache->current_addr;
 	} else {
@@ -998,6 +1001,7 @@ static void
 ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
 {
 	struct ftl_io *io = cb_arg;
+	struct ftl_nv_cache *nv_cache = &io->dev->nv_cache;
 
 	if (spdk_unlikely(!success)) {
 		SPDK_ERRLOG("Non-volatile cache write failed at %"PRIx64"\n", io->ppa.ppa);
@@ -1006,6 +1010,7 @@ ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
 
 	ftl_io_dec_req(io);
 	if (ftl_io_done(io)) {
+		spdk_mempool_put(nv_cache->md_pool, io->md);
 		ftl_io_complete(io);
 	}
 
@@ -1018,21 +1023,23 @@ ftl_submit_nv_cache(void *ctx)
 	struct ftl_io *io = ctx;
 	struct spdk_ftl_dev *dev = io->dev;
 	struct spdk_thread *thread;
+	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
 	struct ftl_io_channel *ioch;
 	int rc;
 
 	ioch = spdk_io_channel_get_ctx(io->ioch);
 	thread = spdk_io_channel_get_thread(io->ioch);
 
-	rc = spdk_bdev_write_blocks(dev->nv_cache.bdev_desc, ioch->cache_ioch,
-				    ftl_io_iovec_addr(io), io->ppa.ppa, io->lbk_cnt,
-				    ftl_nv_cache_submit_cb, io);
+	rc = spdk_bdev_write_blocks_with_md(nv_cache->bdev_desc, ioch->cache_ioch,
+					    ftl_io_iovec_addr(io), io->md, io->ppa.ppa,
+					    io->lbk_cnt, ftl_nv_cache_submit_cb, io);
 	if (rc == -ENOMEM) {
 		spdk_thread_send_msg(thread, ftl_submit_nv_cache, io);
 		return;
 	} else if (rc) {
 		SPDK_ERRLOG("Write to persistent cache failed: %s (%"PRIu64", %"PRIu64")\n",
 			    spdk_strerror(-rc), io->ppa.ppa, io->lbk_cnt);
+		spdk_mempool_put(nv_cache->md_pool, io->md);
 		io->status = -EIO;
 		ftl_io_complete(io);
 		return;
@@ -1042,6 +1049,19 @@ ftl_submit_nv_cache(void *ctx)
 	ftl_io_inc_req(io);
 }
 
+static void
+ftl_nv_cache_fill_md(struct ftl_nv_cache *nv_cache, struct ftl_io *io)
+{
+	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+	void *md_buf = io->md;
+	size_t lbk_off;
+
+	for (lbk_off = 0; lbk_off < io->lbk_cnt; ++lbk_off) {
+		*(uint64_t *)md_buf = ftl_io_get_lba(io, lbk_off);
+		md_buf = (char *)md_buf + spdk_bdev_get_md_size(bdev);
+	}
+}
+
 static void
 _ftl_write_nv_cache(void *ctx)
 {
@@ -1061,9 +1081,17 @@ _ftl_write_nv_cache(void *ctx)
 			return;
 		}
 
+		child->md = spdk_mempool_get(dev->nv_cache.md_pool);
+		if (spdk_unlikely(!child->md)) {
+			ftl_io_free(child);
+			spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
+			break;
+		}
+
 		/* Reserve area on the write buffer cache */
 		child->ppa.ppa = ftl_reserve_nv_cache(&dev->nv_cache, &num_lbks);
 		if (child->ppa.ppa == FTL_LBA_INVALID) {
+			spdk_mempool_put(dev->nv_cache.md_pool, child->md);
 			ftl_io_free(child);
 			spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
 			break;
@@ -1074,6 +1102,7 @@ _ftl_write_nv_cache(void *ctx)
 			ftl_io_shrink_iovec(child, num_lbks);
 		}
 
+		ftl_nv_cache_fill_md(&dev->nv_cache, child);
 		ftl_submit_nv_cache(child);
 	}
 
diff --git a/lib/ftl/ftl_init.c b/lib/ftl/ftl_init.c
index 90fb60f3e..abad660ae 100644
--- a/lib/ftl/ftl_init.c
+++ b/lib/ftl/ftl_init.c
@@ -494,6 +494,18 @@ ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *bdev_desc
 		return -1;
 	}
 
+	if (!spdk_bdev_is_md_separate(bdev)) {
+		SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
+			    spdk_bdev_get_name(bdev));
+		return -1;
+	}
+
+	if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
+		SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
+			    spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
+		return -1;
+	}
+
 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
 	 * from the fact that cache works as a protection against power loss, so before the data
 	 * inside the cache can be overwritten, the band it's stored on has to be closed.