From 59b75da71e520498b0376eae6e89856fcd6f3d51 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Tue, 5 Oct 2021 23:37:19 +0000 Subject: [PATCH] blob: use uint64_t for unmap and write_zeroes lba count Previous patches (5363eb3c) tried to work around the 32-bit unmap and write_zeroes LBA counts by breaking up larger operations into smaller chunks of max size UINT32_MAX lba chunks. But some SSDs may just ignore unmap operations that are not aligned to full physical block boundaries - and a UINT32_MAX lba unmap on a 512B logical / 4KiB physical SSD would not be aligned. If the SSD decided to ignore the unmap/deallocate (which it is allowed to do according to NVMe spec), we could end up with not unmapping *any* blocks. Probably SSDs should always be trying hard to unmap as many blocks as possible, but let's not try to depend on that in blobstore. So one option would be to break them into chunks close to UINT32_MAX which are still aligned to 4KiB boundaries. But the better fix is to just change the unmap and write_zeroes APIs to take 64-bit arguments, and then we can avoid the chunking altogether. Fixes issue #2190. Signed-off-by: Jim Harris Change-Id: I23998e493a764d466927c3520c7a8c7f943000a6 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9737 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Xiaodong Liu Reviewed-by: Changpeng Liu Reviewed-by: Aleksey Marchuk Reviewed-by: Dong Yi Reviewed-by: Ben Walker Reviewed-by: Tomasz Zawadzki Tested-by: SPDK CI Jenkins --- include/spdk/blob.h | 4 +-- lib/blob/Makefile | 2 +- lib/blob/blob_bs_dev.c | 4 +-- lib/blob/blobstore.c | 45 ++++++++++++++---------------- lib/blob/request.c | 12 ++++---- lib/blob/request.h | 6 ++-- lib/blob/zeroes.c | 4 +-- lib/blobfs/Makefile | 2 +- lib/lvol/Makefile | 2 +- module/blob/bdev/blob_bdev.c | 4 +-- test/unit/lib/blob/bs_dev_common.c | 4 +-- 11 files changed, 43 insertions(+), 46 deletions(-) diff --git a/include/spdk/blob.h b/include/spdk/blob.h index 09375e88b..17081acf5 100644 --- a/include/spdk/blob.h +++ b/include/spdk/blob.h @@ -184,11 +184,11 @@ struct spdk_bs_dev { struct spdk_bs_dev_cb_args *cb_args); void (*write_zeroes)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args); void (*unmap)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args); struct spdk_bdev *(*get_base_bdev)(struct spdk_bs_dev *dev); diff --git a/lib/blob/Makefile b/lib/blob/Makefile index 19263199e..d206ca3e5 100644 --- a/lib/blob/Makefile +++ b/lib/blob/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 5 +SO_VER := 6 SO_MINOR := 0 C_SRCS = blobstore.c request.c zeroes.c blob_bs_dev.c diff --git a/lib/blob/blob_bs_dev.c b/lib/blob/blob_bs_dev.c index 8705a1c16..4375f946a 100644 --- a/lib/blob/blob_bs_dev.c +++ b/lib/blob/blob_bs_dev.c @@ -57,7 +57,7 @@ blob_bs_dev_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, static void blob_bs_dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); @@ -66,7 +66,7 @@ blob_bs_dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channe static void blob_bs_dev_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index e3b4d7020..c3b35d3ab 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -1607,7 +1607,7 @@ struct spdk_blob_persist_ctx { static void bs_batch_clear_dev(struct spdk_blob_persist_ctx *ctx, spdk_bs_batch_t *batch, uint64_t lba, - uint32_t lba_count) + uint64_t lba_count) { switch (ctx->blob->clear_method) { case BLOB_CLEAR_WITH_DEFAULT: @@ -1715,7 +1715,7 @@ blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx struct spdk_blob_store *bs = blob->bs; size_t i; uint64_t lba; - uint32_t lba_count; + uint64_t lba_count; spdk_bs_batch_t *batch; batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx); @@ -1787,7 +1787,7 @@ blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ct spdk_bs_batch_t *batch; size_t i; uint64_t lba; - uint32_t lba_count; + uint64_t lba_count; /* Clusters don't move around in blobs. The list shrinks or grows * at the end, but no changes ever occur in the middle of the list. @@ -1800,7 +1800,7 @@ blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ct lba_count = 0; for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) { uint64_t next_lba = blob->active.clusters[i]; - uint32_t next_lba_count = bs_cluster_to_lba(bs, 1); + uint64_t next_lba_count = bs_cluster_to_lba(bs, 1); if (next_lba > 0 && (lba + lba_count) == next_lba) { /* This cluster is contiguous with the previous one. */ @@ -1873,7 +1873,7 @@ blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) struct spdk_blob *blob = ctx->blob; struct spdk_blob_store *bs = blob->bs; uint64_t lba; - uint32_t lba_count; + uint64_t lba_count; spdk_bs_batch_t *batch; size_t i; @@ -2499,7 +2499,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob, static inline bool blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length, - uint64_t *lba, uint32_t *lba_count) + uint64_t *lba, uint64_t *lba_count) { *lba_count = length; @@ -2624,7 +2624,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo { struct spdk_bs_cpl cpl; uint64_t lba; - uint32_t lba_count; + uint64_t lba_count; bool is_allocated; assert(blob != NULL); @@ -2894,7 +2894,7 @@ blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_chan * when the batch was completed, to allow for freeing the memory for the iov arrays. */ if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) { - uint32_t lba_count; + uint64_t lba_count; uint64_t lba; bool is_allocated; @@ -4952,22 +4952,19 @@ spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o, bs_batch_write_zeroes_dev(batch, 0, num_md_lba); lba = num_md_lba; - while (lba < ctx->bs->dev->blockcnt) { - lba_count = spdk_min(UINT32_MAX, ctx->bs->dev->blockcnt - lba); - switch (opts.clear_method) { - case BS_CLEAR_WITH_UNMAP: - /* Trim data clusters */ - bs_batch_unmap_dev(batch, lba, lba_count); - break; - case BS_CLEAR_WITH_WRITE_ZEROES: - /* Write_zeroes to data clusters */ - bs_batch_write_zeroes_dev(batch, lba, lba_count); - break; - case BS_CLEAR_WITH_NONE: - default: - break; - } - lba += lba_count; + lba_count = ctx->bs->dev->blockcnt - lba; + switch (opts.clear_method) { + case BS_CLEAR_WITH_UNMAP: + /* Trim data clusters */ + bs_batch_unmap_dev(batch, lba, lba_count); + break; + case BS_CLEAR_WITH_WRITE_ZEROES: + /* Write_zeroes to data clusters */ + bs_batch_write_zeroes_dev(batch, lba, lba_count); + break; + case BS_CLEAR_WITH_NONE: + default: + break; } bs_batch_close(batch); diff --git a/lib/blob/request.c b/lib/blob/request.c index 6174f1f0e..bdd013285 100644 --- a/lib/blob/request.c +++ b/lib/blob/request.c @@ -231,13 +231,13 @@ bs_sequence_writev_dev(spdk_bs_sequence_t *seq, struct iovec *iov, int iovcnt, void bs_sequence_write_zeroes_dev(spdk_bs_sequence_t *seq, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, spdk_bs_sequence_cpl cb_fn, void *cb_arg) { struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)seq; struct spdk_bs_channel *channel = set->channel; - SPDK_DEBUGLOG(blob_rw, "writing zeroes to %" PRIu32 " blocks at LBA %" PRIu64 "\n", + SPDK_DEBUGLOG(blob_rw, "writing zeroes to %" PRIu64 " blocks at LBA %" PRIu64 "\n", lba_count, lba); set->u.sequence.cb_fn = cb_fn; @@ -360,12 +360,12 @@ bs_batch_write_dev(spdk_bs_batch_t *batch, void *payload, void bs_batch_unmap_dev(spdk_bs_batch_t *batch, - uint64_t lba, uint32_t lba_count) + uint64_t lba, uint64_t lba_count) { struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch; struct spdk_bs_channel *channel = set->channel; - SPDK_DEBUGLOG(blob_rw, "Unmapping %" PRIu32 " blocks at LBA %" PRIu64 "\n", lba_count, + SPDK_DEBUGLOG(blob_rw, "Unmapping %" PRIu64 " blocks at LBA %" PRIu64 "\n", lba_count, lba); set->u.batch.outstanding_ops++; @@ -375,12 +375,12 @@ bs_batch_unmap_dev(spdk_bs_batch_t *batch, void bs_batch_write_zeroes_dev(spdk_bs_batch_t *batch, - uint64_t lba, uint32_t lba_count) + uint64_t lba, uint64_t lba_count) { struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch; struct spdk_bs_channel *channel = set->channel; - SPDK_DEBUGLOG(blob_rw, "Zeroing %" PRIu32 " blocks at LBA %" PRIu64 "\n", lba_count, lba); + SPDK_DEBUGLOG(blob_rw, "Zeroing %" PRIu64 " blocks at LBA %" PRIu64 "\n", lba_count, lba); set->u.batch.outstanding_ops++; channel->dev->write_zeroes(channel->dev, channel->dev_channel, lba, lba_count, diff --git a/lib/blob/request.h b/lib/blob/request.h index 81dc161db..5c98e8a0e 100644 --- a/lib/blob/request.h +++ b/lib/blob/request.h @@ -173,7 +173,7 @@ void bs_sequence_writev_dev(spdk_bs_batch_t *batch, struct iovec *iov, int iovcn spdk_bs_sequence_cpl cb_fn, void *cb_arg); void bs_sequence_write_zeroes_dev(spdk_bs_sequence_t *seq, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, spdk_bs_sequence_cpl cb_fn, void *cb_arg); void bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno); @@ -193,10 +193,10 @@ void bs_batch_write_dev(spdk_bs_batch_t *batch, void *payload, uint64_t lba, uint32_t lba_count); void bs_batch_unmap_dev(spdk_bs_batch_t *batch, - uint64_t lba, uint32_t lba_count); + uint64_t lba, uint64_t lba_count); void bs_batch_write_zeroes_dev(spdk_bs_batch_t *batch, - uint64_t lba, uint32_t lba_count); + uint64_t lba, uint64_t lba_count); void bs_batch_close(spdk_bs_batch_t *batch); diff --git a/lib/blob/zeroes.c b/lib/blob/zeroes.c index 5e8d70545..54112e2a2 100644 --- a/lib/blob/zeroes.c +++ b/lib/blob/zeroes.c @@ -85,7 +85,7 @@ zeroes_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, static void zeroes_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); @@ -94,7 +94,7 @@ zeroes_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, static void zeroes_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); diff --git a/lib/blobfs/Makefile b/lib/blobfs/Makefile index 04219fc46..5ee3d020e 100644 --- a/lib/blobfs/Makefile +++ b/lib/blobfs/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 4 +SO_VER := 5 SO_MINOR := 0 C_SRCS = blobfs.c tree.c diff --git a/lib/lvol/Makefile b/lib/lvol/Makefile index f67628f55..037268f69 100644 --- a/lib/lvol/Makefile +++ b/lib/lvol/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 4 +SO_VER := 5 SO_MINOR := 0 C_SRCS = lvol.c diff --git a/module/blob/bdev/blob_bdev.c b/module/blob/bdev/blob_bdev.c index 32c1a59d8..496231360 100644 --- a/module/blob/bdev/blob_bdev.c +++ b/module/blob/bdev/blob_bdev.c @@ -195,7 +195,7 @@ bdev_blob_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, static void bdev_blob_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba, - uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) + uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { int rc; @@ -211,7 +211,7 @@ bdev_blob_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, static void bdev_blob_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba, - uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) + uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { struct blob_bdev *blob_bdev = (struct blob_bdev *)dev; int rc; diff --git a/test/unit/lib/blob/bs_dev_common.c b/test/unit/lib/blob/bs_dev_common.c index 6774425fd..55e18acbf 100644 --- a/test/unit/lib/blob/bs_dev_common.c +++ b/test/unit/lib/blob/bs_dev_common.c @@ -314,7 +314,7 @@ dev_flush(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, static void dev_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { uint64_t offset, length; @@ -344,7 +344,7 @@ dev_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, static void dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, - uint64_t lba, uint32_t lba_count, + uint64_t lba, uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { uint64_t offset, length;