From d3c6335bc9c1fa8464f7d2d2552d923f7069d848 Mon Sep 17 00:00:00 2001 From: Tomasz Kulasek Date: Wed, 4 Jul 2018 20:45:34 +0200 Subject: [PATCH] blobstore: no copy write in thin-provisioning This patch prevents to copy cluster data when there is not backing blob to improve cluster allocation performance in thin provisioned blobs. Change-Id: Ie766d2e5274daa74c2b13b2198a20205e3417467 Signed-off-by: Tomasz Kulasek Reviewed-on: https://review.gerrithub.io/417938 Tested-by: SPDK CI Jenkins Chandler-Test-Pool: SPDK Automated Test System Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto --- lib/blob/blobstore.c | 31 ++++++++++++++++++----------- test/unit/lib/blob/blob.c/blob_ut.c | 26 ++++++++++++++++++++++++ test/unit/lib/blob/bs_dev_common.c | 7 +++++++ 3 files changed, 52 insertions(+), 12 deletions(-) diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index c86031b17..bcbd616fa 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -1649,13 +1649,15 @@ _spdk_bs_allocate_and_copy_cluster(struct spdk_blob *blob, ctx->blob = blob; ctx->page = cluster_start_page; - ctx->buf = spdk_dma_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen, NULL); - if (!ctx->buf) { - SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n", - blob->bs->cluster_sz); - free(ctx); - spdk_bs_user_op_abort(op); - return; + if (blob->parent_id != SPDK_BLOBID_INVALID) { + ctx->buf = spdk_dma_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen, NULL); + if (!ctx->buf) { + SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n", + blob->bs->cluster_sz); + free(ctx); + spdk_bs_user_op_abort(op); + return; + } } rc = _spdk_bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, false); @@ -1682,11 +1684,16 @@ _spdk_bs_allocate_and_copy_cluster(struct spdk_blob *blob, /* Queue the user op to block other incoming operations */ TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link); - /* Read cluster from backing device */ - spdk_bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf, - _spdk_bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page), - _spdk_bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz), - _spdk_blob_write_copy, ctx); + if (blob->parent_id != SPDK_BLOBID_INVALID) { + /* Read cluster from backing device */ + spdk_bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf, + _spdk_bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page), + _spdk_bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz), + _spdk_blob_write_copy, ctx); + } else { + _spdk_blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster, + _spdk_blob_insert_cluster_cpl, ctx); + } } static void diff --git a/test/unit/lib/blob/blob.c/blob_ut.c b/test/unit/lib/blob/blob.c/blob_ut.c index ea3fa45a3..cbf6d2c55 100644 --- a/test/unit/lib/blob/blob.c/blob_ut.c +++ b/test/unit/lib/blob/blob.c/blob_ut.c @@ -3718,8 +3718,11 @@ blob_thin_prov_rw(void) struct spdk_blob_opts opts; spdk_blob_id blobid; uint64_t free_clusters; + uint64_t page_size; uint8_t payload_read[10 * 4096]; uint8_t payload_write[10 * 4096]; + uint64_t write_bytes; + uint64_t read_bytes; dev = init_dev(); @@ -3728,6 +3731,7 @@ blob_thin_prov_rw(void) SPDK_CU_ASSERT_FATAL(g_bs != NULL); bs = g_bs; free_clusters = spdk_bs_free_cluster_count(bs); + page_size = spdk_bs_get_page_size(bs); channel = spdk_bs_alloc_io_channel(bs); CU_ASSERT(channel != NULL); @@ -3766,10 +3770,17 @@ blob_thin_prov_rw(void) CU_ASSERT(g_bserrno == 0); CU_ASSERT(memcmp(zero, payload_read, 10 * 4096) == 0); + write_bytes = g_dev_write_bytes; + read_bytes = g_dev_read_bytes; + memset(payload_write, 0xE5, sizeof(payload_write)); spdk_blob_io_write(blob, channel, payload_write, 4, 10, blob_op_complete, NULL); CU_ASSERT(g_bserrno == 0); CU_ASSERT(free_clusters != spdk_bs_free_cluster_count(bs)); + /* For thin-provisioned blob we need to write 10 pages plus one page metadata and + * read 0 bytes */ + CU_ASSERT(g_dev_write_bytes - write_bytes == page_size * 11); + CU_ASSERT(g_dev_read_bytes - read_bytes == 0); spdk_blob_io_read(blob, channel, payload_read, 4, 10, blob_op_complete, NULL); CU_ASSERT(g_bserrno == 0); @@ -4001,8 +4012,12 @@ blob_snapshot_rw(void) struct spdk_blob_opts opts; spdk_blob_id blobid, snapshotid; uint64_t free_clusters; + uint64_t cluster_size; + uint64_t page_size; uint8_t payload_read[10 * 4096]; uint8_t payload_write[10 * 4096]; + uint64_t write_bytes; + uint64_t read_bytes; dev = init_dev(); @@ -4011,6 +4026,8 @@ blob_snapshot_rw(void) SPDK_CU_ASSERT_FATAL(g_bs != NULL); bs = g_bs; free_clusters = spdk_bs_free_cluster_count(bs); + cluster_size = spdk_bs_get_cluster_size(bs); + page_size = spdk_bs_get_page_size(bs); channel = spdk_bs_alloc_io_channel(bs); CU_ASSERT(channel != NULL); @@ -4057,11 +4074,20 @@ blob_snapshot_rw(void) CU_ASSERT(spdk_blob_get_num_clusters(snapshot) == 5) + write_bytes = g_dev_write_bytes; + read_bytes = g_dev_read_bytes; + memset(payload_write, 0xAA, sizeof(payload_write)); spdk_blob_io_write(blob, channel, payload_write, 4, 10, blob_op_complete, NULL); CU_ASSERT(g_bserrno == 0); CU_ASSERT(free_clusters != spdk_bs_free_cluster_count(bs)); + /* For a clone we need to allocate and copy one cluster, update one page of metadata + * and then write 10 pages of payload. + */ + CU_ASSERT(g_dev_write_bytes - write_bytes == page_size * 11 + cluster_size); + CU_ASSERT(g_dev_read_bytes - read_bytes == cluster_size); + spdk_blob_io_read(blob, channel, payload_read, 4, 10, blob_op_complete, NULL); CU_ASSERT(g_bserrno == 0); CU_ASSERT(memcmp(payload_write, payload_read, 10 * 4096) == 0); diff --git a/test/unit/lib/blob/bs_dev_common.c b/test/unit/lib/blob/bs_dev_common.c index eb9c0727c..fe3105269 100644 --- a/test/unit/lib/blob/bs_dev_common.c +++ b/test/unit/lib/blob/bs_dev_common.c @@ -39,6 +39,8 @@ #define DEV_BUFFER_BLOCKLEN (4096) #define DEV_BUFFER_BLOCKCNT (DEV_BUFFER_SIZE / DEV_BUFFER_BLOCKLEN) uint8_t *g_dev_buffer; +uint64_t g_dev_write_bytes; +uint64_t g_dev_read_bytes; /* Define here for UT only. */ struct spdk_io_channel g_io_channel; @@ -86,6 +88,7 @@ dev_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload length = lba_count * dev->blocklen; SPDK_CU_ASSERT_FATAL(offset + length <= DEV_BUFFER_SIZE); memcpy(payload, &g_dev_buffer[offset], length); + g_dev_read_bytes += length; spdk_thread_send_msg(spdk_get_thread(), dev_complete, cb_args); } @@ -100,6 +103,7 @@ dev_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payloa length = lba_count * dev->blocklen; SPDK_CU_ASSERT_FATAL(offset + length <= DEV_BUFFER_SIZE); memcpy(&g_dev_buffer[offset], payload, length); + g_dev_write_bytes += length; spdk_thread_send_msg(spdk_get_thread(), dev_complete, cb_args); } @@ -134,6 +138,7 @@ dev_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, offset += iov[i].iov_len; } + g_dev_read_bytes += length; spdk_thread_send_msg(spdk_get_thread(), dev_complete, cb_args); } @@ -156,6 +161,7 @@ dev_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, offset += iov[i].iov_len; } + g_dev_write_bytes += length; spdk_thread_send_msg(spdk_get_thread(), dev_complete, cb_args); } @@ -191,6 +197,7 @@ dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, length = lba_count * dev->blocklen; SPDK_CU_ASSERT_FATAL(offset + length <= DEV_BUFFER_SIZE); memset(&g_dev_buffer[offset], 0, length); + g_dev_write_bytes += length; spdk_thread_send_msg(spdk_get_thread(), dev_complete, cb_args); }