blob: Optimize copy-on-write flow for clusters backed by zeroes device

Writing to unallocated cluster triggers copy-on-write sequence. If
this cluster is backed by zeroes device we can skip the copy part. For
a simple thin provisioned volume copy this shortcut is already
implemented because `blob->parent_id == SPDK_BLOBID_INVALID`. But this
will not work for thin provisioned volumes created from snapshot. In
this case we need to traverse the whole stack of underlying
`spdk_bs_dev` devices for specific cluster to check if it is zeroes
backed.

This patch adds `is_zeroes` operation to `spdk_bs_dev`. For zeroes
device it always returns 'true', for real bdev (`blob_bs_dev`) always
returns false, for another layer of `blob_bs_dev` does lba conversion
and forwards to backing device.

In blobstore's cluster copy flow we check if cluster is backed by
zeroes device and skip copy part if it is.

Signed-off-by: Evgeniy Kochetov <evgeniik@nvidia.com>
Change-Id: I640773ac78f8f466b96e96a34c3a6c3c91f87dab
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13446
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
Evgeniy Kochetov 2022-06-22 16:55:46 +03:00 committed by Tomasz Zawadzki
parent ab58ddf107
commit 2e7a7fe530
8 changed files with 48 additions and 5 deletions

View File

@ -27,6 +27,9 @@ For now we are using hard-coded PSK and only support TLS 1.3
Reserve space for used_cluster bitmap. The reserved space could be used for blobstore growing Reserve space for used_cluster bitmap. The reserved space could be used for blobstore growing
in the future. in the future.
Added `is_zeroes` operation to `spdk_bs_dev`. It allows to detect if logical blocks are backed
by zeroes device and do a shortcut in copy-on-write flow by excluding copy part from zeroes device.
### lvol ### lvol
Add num_md_pages_per_cluster_ratio parameter to the bdev_lvol_create_lvstore RPC. Add num_md_pages_per_cluster_ratio parameter to the bdev_lvol_create_lvstore RPC.

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause /* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) Intel Corporation. * Copyright (c) Intel Corporation.
* All rights reserved. * All rights reserved.
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*/ */
/** \file /** \file
@ -195,6 +195,8 @@ struct spdk_bs_dev {
struct spdk_bdev *(*get_base_bdev)(struct spdk_bs_dev *dev); struct spdk_bdev *(*get_base_bdev)(struct spdk_bs_dev *dev);
bool (*is_zeroes)(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count);
uint64_t blockcnt; uint64_t blockcnt;
uint32_t blocklen; /* In bytes */ uint32_t blocklen; /* In bytes */
}; };

View File

@ -117,6 +117,24 @@ blob_bs_dev_destroy(struct spdk_bs_dev *bs_dev)
spdk_blob_close(b->blob, blob_bs_dev_destroy_cpl, b); spdk_blob_close(b->blob, blob_bs_dev_destroy_cpl, b);
} }
static bool
blob_bs_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count)
{
struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev;
struct spdk_blob *blob = b->blob;
assert(lba == bs_cluster_to_lba(blob->bs, bs_lba_to_cluster(blob->bs, lba)));
assert(lba_count == bs_dev_byte_to_lba(dev, blob->bs->cluster_sz));
if (bs_io_unit_is_allocated(blob, lba)) {
return false;
}
assert(blob->back_bs_dev != NULL);
return blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
bs_io_unit_to_back_dev_lba(blob, lba),
bs_io_unit_to_back_dev_lba(blob, lba_count));
}
struct spdk_bs_dev * struct spdk_bs_dev *
bs_create_blob_bs_dev(struct spdk_blob *blob) bs_create_blob_bs_dev(struct spdk_blob *blob)
@ -142,6 +160,7 @@ bs_create_blob_bs_dev(struct spdk_blob *blob)
b->bs_dev.readv_ext = blob_bs_dev_readv_ext; b->bs_dev.readv_ext = blob_bs_dev_readv_ext;
b->bs_dev.write_zeroes = blob_bs_dev_write_zeroes; b->bs_dev.write_zeroes = blob_bs_dev_write_zeroes;
b->bs_dev.unmap = blob_bs_dev_unmap; b->bs_dev.unmap = blob_bs_dev_unmap;
b->bs_dev.is_zeroes = blob_bs_is_zeroes;
b->blob = blob; b->blob = blob;
return &b->bs_dev; return &b->bs_dev;

View File

@ -2399,6 +2399,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob,
struct spdk_blob_copy_cluster_ctx *ctx; struct spdk_blob_copy_cluster_ctx *ctx;
uint32_t cluster_start_page; uint32_t cluster_start_page;
uint32_t cluster_number; uint32_t cluster_number;
bool is_zeroes;
int rc; int rc;
ch = spdk_io_channel_get_ctx(_ch); ch = spdk_io_channel_get_ctx(_ch);
@ -2431,7 +2432,10 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob,
ctx->new_cluster_page = ch->new_cluster_page; ctx->new_cluster_page = ch->new_cluster_page;
memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE); memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
if (blob->parent_id != SPDK_BLOBID_INVALID) { is_zeroes = blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen, ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
if (!ctx->buf) { if (!ctx->buf) {
@ -2472,7 +2476,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob,
/* Queue the user op to block other incoming operations */ /* Queue the user op to block other incoming operations */
TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link); TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
if (blob->parent_id != SPDK_BLOBID_INVALID) { if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
/* Read cluster from backing device */ /* Read cluster from backing device */
bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf, bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page), bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),

View File

@ -118,6 +118,12 @@ zeroes_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
assert(false); assert(false);
} }
static bool
zeroes_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count)
{
return true;
}
static struct spdk_bs_dev g_zeroes_bs_dev = { static struct spdk_bs_dev g_zeroes_bs_dev = {
.blockcnt = UINT64_MAX, .blockcnt = UINT64_MAX,
.blocklen = 512, .blocklen = 512,
@ -132,6 +138,7 @@ static struct spdk_bs_dev g_zeroes_bs_dev = {
.writev_ext = zeroes_writev_ext, .writev_ext = zeroes_writev_ext,
.write_zeroes = zeroes_write_zeroes, .write_zeroes = zeroes_write_zeroes,
.unmap = zeroes_unmap, .unmap = zeroes_unmap,
.is_zeroes = zeroes_is_zeroes,
}; };
struct spdk_bs_dev * struct spdk_bs_dev *

View File

@ -6,7 +6,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 6 SO_VER := 7
SO_MINOR := 0 SO_MINOR := 0
C_SRCS = blobfs.c tree.c C_SRCS = blobfs.c tree.c

View File

@ -6,7 +6,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 7 SO_VER := 8
SO_MINOR := 0 SO_MINOR := 0
C_SRCS = blob_bdev.c C_SRCS = blob_bdev.c

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause /* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) Intel Corporation. * Copyright (c) Intel Corporation.
* All rights reserved. * All rights reserved.
* Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*/ */
#include "spdk/stdinc.h" #include "spdk/stdinc.h"
@ -353,6 +354,12 @@ bdev_blob_get_base_bdev(struct spdk_bs_dev *bs_dev)
return __get_bdev(bs_dev); return __get_bdev(bs_dev);
} }
static bool
bdev_blob_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count)
{
return false;
}
static void static void
blob_bdev_init(struct blob_bdev *b, struct spdk_bdev_desc *desc) blob_bdev_init(struct blob_bdev *b, struct spdk_bdev_desc *desc)
{ {
@ -377,6 +384,7 @@ blob_bdev_init(struct blob_bdev *b, struct spdk_bdev_desc *desc)
b->bs_dev.write_zeroes = bdev_blob_write_zeroes; b->bs_dev.write_zeroes = bdev_blob_write_zeroes;
b->bs_dev.unmap = bdev_blob_unmap; b->bs_dev.unmap = bdev_blob_unmap;
b->bs_dev.get_base_bdev = bdev_blob_get_base_bdev; b->bs_dev.get_base_bdev = bdev_blob_get_base_bdev;
b->bs_dev.is_zeroes = bdev_blob_is_zeroes;
} }
int int