From db3d1201a4c11ccbe46c8d07a4e565b37275c259 Mon Sep 17 00:00:00 2001 From: Sochin Jiang Date: Tue, 14 Jul 2020 00:12:42 +0800 Subject: [PATCH] lib/blob: fix a data corruption bug There is a fatal bug that could easily cause data corruption when using thin-provisioned blobs. In blob_request_submit_rw_iov(), we first get lba by calling blob_calculate_lba_and_lba_count(), blob_calculate_lba_and_lba_count() calculates different lbas according to the return of bs_io_unit_is_allocated(). Later, we call bs_io_unit_is_allocated() again to judge whether the specific cluster is allocated, the problem is it may have be allocated here while not be allocated when calling blob_calculate_lba_and_lba_count() before. To ensure the correctness of lba, we can do lba recalculation when bs_io_unit_is_allocated() returns true, or make blob_calculate_lba_and_lba_count() return the result of bs_io_unit_is_allocated(), use the second solution in this patch. By configuring more than one cpu core, md thread will run in a separate SPDK thread, this data corruption scenario could be easily reproduced by running fio verify in VMs using thin-provisioned Lvols as block devices. Signed-off-by: Sochin Jiang Change-Id: I099865ff291ea42d5d49b693cc53f64b60881684 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3318 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto Reviewed-by: Tomasz Zawadzki --- lib/blob/blobstore.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index 768fc5b45..a906cb1e0 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -2354,7 +2354,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob, } } -static inline void +static inline bool blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length, uint64_t *lba, uint32_t *lba_count) { @@ -2364,8 +2364,10 @@ blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint6 assert(blob->back_bs_dev != NULL); *lba = bs_io_unit_to_back_dev_lba(blob, io_unit); *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count); + return false; } else { *lba = bs_blob_io_unit_to_lba(blob, io_unit); + return true; } } @@ -2480,6 +2482,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo struct spdk_bs_cpl cpl; uint64_t lba; uint32_t lba_count; + bool is_allocated; assert(blob != NULL); @@ -2487,7 +2490,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo cpl.u.blob_basic.cb_fn = cb_fn; cpl.u.blob_basic.cb_arg = cb_arg; - blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count); + is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count); if (blob->frozen_refcnt) { /* This blob I/O is frozen */ @@ -2515,7 +2518,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo return; } - if (bs_io_unit_is_allocated(blob, offset)) { + if (is_allocated) { /* Read from the blob */ bs_batch_read_dev(batch, payload, lba, lba_count); } else { @@ -2528,7 +2531,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo } case SPDK_BLOB_WRITE: case SPDK_BLOB_WRITE_ZEROES: { - if (bs_io_unit_is_allocated(blob, offset)) { + if (is_allocated) { /* Write to the blob */ spdk_bs_batch_t *batch; @@ -2573,7 +2576,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo return; } - if (bs_io_unit_is_allocated(blob, offset)) { + if (is_allocated) { bs_batch_unmap_dev(batch, lba, lba_count); } @@ -2745,6 +2748,7 @@ blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_chan if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) { uint32_t lba_count; uint64_t lba; + bool is_allocated; cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC; cpl.u.blob_basic.cb_fn = cb_fn; @@ -2768,7 +2772,7 @@ blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_chan return; } - blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count); + is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count); if (read) { spdk_bs_sequence_t *seq; @@ -2779,14 +2783,14 @@ blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_chan return; } - if (bs_io_unit_is_allocated(blob, offset)) { + if (is_allocated) { bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL); } else { bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count, rw_iov_done, NULL); } } else { - if (bs_io_unit_is_allocated(blob, offset)) { + if (is_allocated) { spdk_bs_sequence_t *seq; seq = bs_sequence_start(_channel, &cpl);