diff --git a/CHANGELOG.md b/CHANGELOG.md index 746c4f23c..89729c116 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,9 @@ For now we are using hard-coded PSK and only support TLS 1.3 Reserve space for used_cluster bitmap. The reserved space could be used for blobstore growing in the future. +Added `is_zeroes` operation to `spdk_bs_dev`. It allows to detect if logical blocks are backed +by zeroes device and do a shortcut in copy-on-write flow by excluding copy part from zeroes device. + ### lvol Add num_md_pages_per_cluster_ratio parameter to the bdev_lvol_create_lvstore RPC. diff --git a/include/spdk/blob.h b/include/spdk/blob.h index 9c45fdf30..1c56e1681 100644 --- a/include/spdk/blob.h +++ b/include/spdk/blob.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright (c) Intel Corporation. * All rights reserved. - * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ /** \file @@ -195,6 +195,8 @@ struct spdk_bs_dev { struct spdk_bdev *(*get_base_bdev)(struct spdk_bs_dev *dev); + bool (*is_zeroes)(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count); + uint64_t blockcnt; uint32_t blocklen; /* In bytes */ }; diff --git a/lib/blob/blob_bs_dev.c b/lib/blob/blob_bs_dev.c index 62783216e..145d5f8db 100644 --- a/lib/blob/blob_bs_dev.c +++ b/lib/blob/blob_bs_dev.c @@ -117,6 +117,24 @@ blob_bs_dev_destroy(struct spdk_bs_dev *bs_dev) spdk_blob_close(b->blob, blob_bs_dev_destroy_cpl, b); } +static bool +blob_bs_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count) +{ + struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev; + struct spdk_blob *blob = b->blob; + + assert(lba == bs_cluster_to_lba(blob->bs, bs_lba_to_cluster(blob->bs, lba))); + assert(lba_count == bs_dev_byte_to_lba(dev, blob->bs->cluster_sz)); + + if (bs_io_unit_is_allocated(blob, lba)) { + return false; + } + + assert(blob->back_bs_dev != NULL); + return blob->back_bs_dev->is_zeroes(blob->back_bs_dev, + bs_io_unit_to_back_dev_lba(blob, lba), + bs_io_unit_to_back_dev_lba(blob, lba_count)); +} struct spdk_bs_dev * bs_create_blob_bs_dev(struct spdk_blob *blob) @@ -142,6 +160,7 @@ bs_create_blob_bs_dev(struct spdk_blob *blob) b->bs_dev.readv_ext = blob_bs_dev_readv_ext; b->bs_dev.write_zeroes = blob_bs_dev_write_zeroes; b->bs_dev.unmap = blob_bs_dev_unmap; + b->bs_dev.is_zeroes = blob_bs_is_zeroes; b->blob = blob; return &b->bs_dev; diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index a900d17be..61cb9871d 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -2399,6 +2399,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob, struct spdk_blob_copy_cluster_ctx *ctx; uint32_t cluster_start_page; uint32_t cluster_number; + bool is_zeroes; int rc; ch = spdk_io_channel_get_ctx(_ch); @@ -2431,7 +2432,10 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob, ctx->new_cluster_page = ch->new_cluster_page; memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE); - if (blob->parent_id != SPDK_BLOBID_INVALID) { + is_zeroes = blob->back_bs_dev->is_zeroes(blob->back_bs_dev, + bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page), + bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz)); + if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) { ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); if (!ctx->buf) { @@ -2472,7 +2476,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob, /* Queue the user op to block other incoming operations */ TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link); - if (blob->parent_id != SPDK_BLOBID_INVALID) { + if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) { /* Read cluster from backing device */ bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf, bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page), diff --git a/lib/blob/zeroes.c b/lib/blob/zeroes.c index feb65304f..8342414ca 100644 --- a/lib/blob/zeroes.c +++ b/lib/blob/zeroes.c @@ -118,6 +118,12 @@ zeroes_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, assert(false); } +static bool +zeroes_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count) +{ + return true; +} + static struct spdk_bs_dev g_zeroes_bs_dev = { .blockcnt = UINT64_MAX, .blocklen = 512, @@ -132,6 +138,7 @@ static struct spdk_bs_dev g_zeroes_bs_dev = { .writev_ext = zeroes_writev_ext, .write_zeroes = zeroes_write_zeroes, .unmap = zeroes_unmap, + .is_zeroes = zeroes_is_zeroes, }; struct spdk_bs_dev * diff --git a/lib/blobfs/Makefile b/lib/blobfs/Makefile index a7f31f16d..a9d1c2ca2 100644 --- a/lib/blobfs/Makefile +++ b/lib/blobfs/Makefile @@ -6,7 +6,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 6 +SO_VER := 7 SO_MINOR := 0 C_SRCS = blobfs.c tree.c diff --git a/module/blob/bdev/Makefile b/module/blob/bdev/Makefile index 70ef14e97..fa40d2259 100644 --- a/module/blob/bdev/Makefile +++ b/module/blob/bdev/Makefile @@ -6,7 +6,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 7 +SO_VER := 8 SO_MINOR := 0 C_SRCS = blob_bdev.c diff --git a/module/blob/bdev/blob_bdev.c b/module/blob/bdev/blob_bdev.c index ef3f498b0..ef153012b 100644 --- a/module/blob/bdev/blob_bdev.c +++ b/module/blob/bdev/blob_bdev.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright (c) Intel Corporation. * All rights reserved. + * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #include "spdk/stdinc.h" @@ -353,6 +354,12 @@ bdev_blob_get_base_bdev(struct spdk_bs_dev *bs_dev) return __get_bdev(bs_dev); } +static bool +bdev_blob_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count) +{ + return false; +} + static void blob_bdev_init(struct blob_bdev *b, struct spdk_bdev_desc *desc) { @@ -377,6 +384,7 @@ blob_bdev_init(struct blob_bdev *b, struct spdk_bdev_desc *desc) b->bs_dev.write_zeroes = bdev_blob_write_zeroes; b->bs_dev.unmap = bdev_blob_unmap; b->bs_dev.get_base_bdev = bdev_blob_get_base_bdev; + b->bs_dev.is_zeroes = bdev_blob_is_zeroes; } int