lib/ftl: store metadata on non-volatile cache

Send LBA along with the data block when mirroring writes to the non-volatile cache. The metadata buffer is retrieved from the metadata pool, so the maximum number of concurrent requests is limited to nv_cache.max_request_cnt, while the number of blocks in a single request is limited by nv_cache.max_requets_size. Change-Id: If260302d16039183fb0fe073ef7419947532cfab Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/458093 Reviewed-by: Mateusz Kozlowski <mateusz.kozlowski@intel.com> Reviewed-by: Wojciech Malikowski <wojciech.malikowski@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-29 12:42:56 +02:00 · 2019-05-29 12:42:56 +02:00 · 18b1de97d8
commit 18b1de97d8
parent 11ff1f4a2b
2 changed files with 44 additions and 3 deletions
--- a/lib/ftl/ftl_core.c
+++ b/lib/ftl/ftl_core.c
@ -952,6 +952,7 @@ static uint64_t
 ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_lbks)
 {
 	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+	struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache);
 	uint64_t num_available, cache_size, cache_addr = FTL_LBA_INVALID;

 	cache_size = spdk_bdev_get_num_blocks(bdev);
@ -962,6 +963,8 @@ ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_lbks)
 	}

 	num_available = spdk_min(nv_cache->num_available, *num_lbks);
+	num_available = spdk_min(num_available, dev->conf.nv_cache.max_request_cnt);
+
 	if (spdk_unlikely(nv_cache->current_addr + num_available > cache_size)) {
 		*num_lbks = cache_size - nv_cache->current_addr;
 	} else {
@ -998,6 +1001,7 @@ static void
 ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
 {
 	struct ftl_io *io = cb_arg;
+	struct ftl_nv_cache *nv_cache = &io->dev->nv_cache;

 	if (spdk_unlikely(!success)) {
 		SPDK_ERRLOG("Non-volatile cache write failed at %"PRIx64"\n", io->ppa.ppa);
@ -1006,6 +1010,7 @@ ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)

 	ftl_io_dec_req(io);
 	if (ftl_io_done(io)) {
+		spdk_mempool_put(nv_cache->md_pool, io->md);
 		ftl_io_complete(io);
 	}

@ -1018,21 +1023,23 @@ ftl_submit_nv_cache(void *ctx)
 	struct ftl_io *io = ctx;
 	struct spdk_ftl_dev *dev = io->dev;
 	struct spdk_thread *thread;
+	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
 	struct ftl_io_channel *ioch;
 	int rc;

 	ioch = spdk_io_channel_get_ctx(io->ioch);
 	thread = spdk_io_channel_get_thread(io->ioch);

-	rc = spdk_bdev_write_blocks(dev->nv_cache.bdev_desc, ioch->cache_ioch,
-				    ftl_io_iovec_addr(io), io->ppa.ppa, io->lbk_cnt,
-				    ftl_nv_cache_submit_cb, io);
+	rc = spdk_bdev_write_blocks_with_md(nv_cache->bdev_desc, ioch->cache_ioch,
+					    ftl_io_iovec_addr(io), io->md, io->ppa.ppa,
+					    io->lbk_cnt, ftl_nv_cache_submit_cb, io);
 	if (rc == -ENOMEM) {
 		spdk_thread_send_msg(thread, ftl_submit_nv_cache, io);
 		return;
 	} else if (rc) {
 		SPDK_ERRLOG("Write to persistent cache failed: %s (%"PRIu64", %"PRIu64")\n",
 			    spdk_strerror(-rc), io->ppa.ppa, io->lbk_cnt);
+		spdk_mempool_put(nv_cache->md_pool, io->md);
 		io->status = -EIO;
 		ftl_io_complete(io);
 		return;
@ -1042,6 +1049,19 @@ ftl_submit_nv_cache(void *ctx)
 	ftl_io_inc_req(io);
 }

+static void
+ftl_nv_cache_fill_md(struct ftl_nv_cache *nv_cache, struct ftl_io *io)
+{
+	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+	void *md_buf = io->md;
+	size_t lbk_off;
+
+	for (lbk_off = 0; lbk_off < io->lbk_cnt; ++lbk_off) {
+		*(uint64_t *)md_buf = ftl_io_get_lba(io, lbk_off);
+		md_buf = (char *)md_buf + spdk_bdev_get_md_size(bdev);
+	}
+}
+
 static void
 _ftl_write_nv_cache(void *ctx)
 {
@ -1061,9 +1081,17 @@ _ftl_write_nv_cache(void *ctx)
 			return;
 		}

+		child->md = spdk_mempool_get(dev->nv_cache.md_pool);
+		if (spdk_unlikely(!child->md)) {
+			ftl_io_free(child);
+			spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
+			break;
+		}
+
 		/* Reserve area on the write buffer cache */
 		child->ppa.ppa = ftl_reserve_nv_cache(&dev->nv_cache, &num_lbks);
 		if (child->ppa.ppa == FTL_LBA_INVALID) {
+			spdk_mempool_put(dev->nv_cache.md_pool, child->md);
 			ftl_io_free(child);
 			spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
 			break;
@ -1074,6 +1102,7 @@ _ftl_write_nv_cache(void *ctx)
 			ftl_io_shrink_iovec(child, num_lbks);
 		}

+		ftl_nv_cache_fill_md(&dev->nv_cache, child);
 		ftl_submit_nv_cache(child);
 	}

--- a/lib/ftl/ftl_init.c
+++ b/lib/ftl/ftl_init.c
@ -494,6 +494,18 @@ ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *bdev_desc
 		return -1;
 	}

+	if (!spdk_bdev_is_md_separate(bdev)) {
+		SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
+			    spdk_bdev_get_name(bdev));
+		return -1;
+	}
+
+	if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
+		SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
+			    spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
+		return -1;
+	}
+
 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
 	 * from the fact that cache works as a protection against power loss, so before the data
 	 * inside the cache can be overwritten, the band it's stored on has to be closed.