lib/reduce: queue overlapping I/O to same logical chunk
Write operations are not in-place, meaning that once it is completed, it's chunk and backing io units are freed. Reads in progress to these backing io units, from other I/O to the same chunk, could cause data corruption. Simultaneous writes to the same chunk can cause similar issues. There are cases where we can relax this in the future - for example, overlapped reads to the same chunk poses no problems. We'll leave that as a future exercise. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I3a4d9e8e3ab920f823084cd1bae91f179a4386ff Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/448333 Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
dfb60590c9
commit
4fb2260117
@ -78,6 +78,9 @@ struct spdk_reduce_pm_file {
|
|||||||
uint64_t size;
|
uint64_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define REDUCE_IO_READV 1
|
||||||
|
#define REDUCE_IO_WRITEV 2
|
||||||
|
|
||||||
struct spdk_reduce_vol_request {
|
struct spdk_reduce_vol_request {
|
||||||
/**
|
/**
|
||||||
* Scratch buffer used for read/modify/write operations on
|
* Scratch buffer used for read/modify/write operations on
|
||||||
@ -88,6 +91,7 @@ struct spdk_reduce_vol_request {
|
|||||||
struct iovec *buf_iov;
|
struct iovec *buf_iov;
|
||||||
struct iovec *iov;
|
struct iovec *iov;
|
||||||
struct spdk_reduce_vol *vol;
|
struct spdk_reduce_vol *vol;
|
||||||
|
int type;
|
||||||
int reduce_errno;
|
int reduce_errno;
|
||||||
int iovcnt;
|
int iovcnt;
|
||||||
int num_backing_ops;
|
int num_backing_ops;
|
||||||
@ -119,12 +123,17 @@ struct spdk_reduce_vol {
|
|||||||
|
|
||||||
struct spdk_reduce_vol_request *request_mem;
|
struct spdk_reduce_vol_request *request_mem;
|
||||||
TAILQ_HEAD(, spdk_reduce_vol_request) free_requests;
|
TAILQ_HEAD(, spdk_reduce_vol_request) free_requests;
|
||||||
|
TAILQ_HEAD(, spdk_reduce_vol_request) executing_requests;
|
||||||
|
TAILQ_HEAD(, spdk_reduce_vol_request) queued_requests;
|
||||||
|
|
||||||
/* Single contiguous buffer used for all request buffers for this volume. */
|
/* Single contiguous buffer used for all request buffers for this volume. */
|
||||||
uint8_t *reqbufspace;
|
uint8_t *reqbufspace;
|
||||||
struct iovec *buf_iov_mem;
|
struct iovec *buf_iov_mem;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void _start_readv_request(struct spdk_reduce_vol_request *req);
|
||||||
|
static void _start_writev_request(struct spdk_reduce_vol_request *req);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate extra metadata chunks and corresponding backing io units to account for
|
* Allocate extra metadata chunks and corresponding backing io units to account for
|
||||||
* outstanding IO in worst case scenario where logical map is completely allocated
|
* outstanding IO in worst case scenario where logical map is completely allocated
|
||||||
@ -449,6 +458,10 @@ spdk_reduce_vol_init(struct spdk_reduce_vol_params *params,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TAILQ_INIT(&vol->free_requests);
|
||||||
|
TAILQ_INIT(&vol->executing_requests);
|
||||||
|
TAILQ_INIT(&vol->queued_requests);
|
||||||
|
|
||||||
vol->backing_super = spdk_dma_zmalloc(sizeof(*vol->backing_super), 0, NULL);
|
vol->backing_super = spdk_dma_zmalloc(sizeof(*vol->backing_super), 0, NULL);
|
||||||
if (vol->backing_super == NULL) {
|
if (vol->backing_super == NULL) {
|
||||||
cb_fn(cb_arg, NULL, -ENOMEM);
|
cb_fn(cb_arg, NULL, -ENOMEM);
|
||||||
@ -654,6 +667,10 @@ spdk_reduce_vol_load(struct spdk_reduce_backing_dev *backing_dev,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TAILQ_INIT(&vol->free_requests);
|
||||||
|
TAILQ_INIT(&vol->executing_requests);
|
||||||
|
TAILQ_INIT(&vol->queued_requests);
|
||||||
|
|
||||||
vol->backing_super = spdk_dma_zmalloc(sizeof(*vol->backing_super), 64, NULL);
|
vol->backing_super = spdk_dma_zmalloc(sizeof(*vol->backing_super), 64, NULL);
|
||||||
if (vol->backing_super == NULL) {
|
if (vol->backing_super == NULL) {
|
||||||
_init_load_cleanup(vol, NULL);
|
_init_load_cleanup(vol, NULL);
|
||||||
@ -812,8 +829,26 @@ typedef void (*reduce_request_fn)(void *_req, int reduce_errno);
|
|||||||
static void
|
static void
|
||||||
_reduce_vol_complete_req(struct spdk_reduce_vol_request *req, int reduce_errno)
|
_reduce_vol_complete_req(struct spdk_reduce_vol_request *req, int reduce_errno)
|
||||||
{
|
{
|
||||||
|
struct spdk_reduce_vol_request *next_req;
|
||||||
|
struct spdk_reduce_vol *vol = req->vol;
|
||||||
|
|
||||||
req->cb_fn(req->cb_arg, reduce_errno);
|
req->cb_fn(req->cb_arg, reduce_errno);
|
||||||
TAILQ_INSERT_HEAD(&req->vol->free_requests, req, tailq);
|
TAILQ_REMOVE(&vol->executing_requests, req, tailq);
|
||||||
|
|
||||||
|
TAILQ_FOREACH(next_req, &vol->queued_requests, tailq) {
|
||||||
|
if (next_req->logical_map_index == req->logical_map_index) {
|
||||||
|
TAILQ_REMOVE(&vol->queued_requests, next_req, tailq);
|
||||||
|
if (next_req->type == REDUCE_IO_READV) {
|
||||||
|
_start_readv_request(next_req);
|
||||||
|
} else {
|
||||||
|
assert(next_req->type == REDUCE_IO_WRITEV);
|
||||||
|
_start_writev_request(next_req);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TAILQ_INSERT_HEAD(&vol->free_requests, req, tailq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -1010,9 +1045,24 @@ _iov_array_is_valid(struct spdk_reduce_vol *vol, struct iovec *iov, int iovcnt,
|
|||||||
return size == (length * vol->params.logical_block_size);
|
return size == (length * vol->params.logical_block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
_check_overlap(struct spdk_reduce_vol *vol, uint64_t logical_map_index)
|
||||||
|
{
|
||||||
|
struct spdk_reduce_vol_request *req;
|
||||||
|
|
||||||
|
TAILQ_FOREACH(req, &vol->executing_requests, tailq) {
|
||||||
|
if (logical_map_index == req->logical_map_index) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_start_readv_request(struct spdk_reduce_vol_request *req)
|
_start_readv_request(struct spdk_reduce_vol_request *req)
|
||||||
{
|
{
|
||||||
|
TAILQ_INSERT_TAIL(&req->vol->executing_requests, req, tailq);
|
||||||
_reduce_vol_read_chunk(req, _read_read_done);
|
_reduce_vol_read_chunk(req, _read_read_done);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1023,6 +1073,7 @@ spdk_reduce_vol_readv(struct spdk_reduce_vol *vol,
|
|||||||
{
|
{
|
||||||
struct spdk_reduce_vol_request *req;
|
struct spdk_reduce_vol_request *req;
|
||||||
uint64_t logical_map_index;
|
uint64_t logical_map_index;
|
||||||
|
bool overlapped;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
@ -1041,7 +1092,9 @@ spdk_reduce_vol_readv(struct spdk_reduce_vol *vol,
|
|||||||
}
|
}
|
||||||
|
|
||||||
logical_map_index = offset / vol->logical_blocks_per_chunk;
|
logical_map_index = offset / vol->logical_blocks_per_chunk;
|
||||||
if (vol->pm_logical_map[logical_map_index] == REDUCE_EMPTY_MAP_ENTRY) {
|
overlapped = _check_overlap(vol, logical_map_index);
|
||||||
|
|
||||||
|
if (!overlapped && vol->pm_logical_map[logical_map_index] == REDUCE_EMPTY_MAP_ENTRY) {
|
||||||
/*
|
/*
|
||||||
* This chunk hasn't been allocated. So treat the data as all
|
* This chunk hasn't been allocated. So treat the data as all
|
||||||
* zeroes for this chunk - do the memset and immediately complete
|
* zeroes for this chunk - do the memset and immediately complete
|
||||||
@ -1061,16 +1114,21 @@ spdk_reduce_vol_readv(struct spdk_reduce_vol *vol,
|
|||||||
}
|
}
|
||||||
|
|
||||||
TAILQ_REMOVE(&vol->free_requests, req, tailq);
|
TAILQ_REMOVE(&vol->free_requests, req, tailq);
|
||||||
|
req->type = REDUCE_IO_READV;
|
||||||
req->vol = vol;
|
req->vol = vol;
|
||||||
req->iov = iov;
|
req->iov = iov;
|
||||||
req->iovcnt = iovcnt;
|
req->iovcnt = iovcnt;
|
||||||
req->offset = offset;
|
req->offset = offset;
|
||||||
req->logical_map_index = offset / vol->logical_blocks_per_chunk;
|
req->logical_map_index = logical_map_index;
|
||||||
req->length = length;
|
req->length = length;
|
||||||
req->cb_fn = cb_fn;
|
req->cb_fn = cb_fn;
|
||||||
req->cb_arg = cb_arg;
|
req->cb_arg = cb_arg;
|
||||||
|
|
||||||
_start_readv_request(req);
|
if (!overlapped) {
|
||||||
|
_start_readv_request(req);
|
||||||
|
} else {
|
||||||
|
TAILQ_INSERT_TAIL(&vol->queued_requests, req, tailq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -1082,6 +1140,7 @@ _start_writev_request(struct spdk_reduce_vol_request *req)
|
|||||||
int i;
|
int i;
|
||||||
uint8_t *buf;
|
uint8_t *buf;
|
||||||
|
|
||||||
|
TAILQ_INSERT_TAIL(&req->vol->executing_requests, req, tailq);
|
||||||
if (vol->pm_logical_map[req->logical_map_index] != REDUCE_EMPTY_MAP_ENTRY) {
|
if (vol->pm_logical_map[req->logical_map_index] != REDUCE_EMPTY_MAP_ENTRY) {
|
||||||
/* Read old chunk, then overwrite with data from this write operation.
|
/* Read old chunk, then overwrite with data from this write operation.
|
||||||
* TODO: bypass reading old chunk if this write operation overwrites
|
* TODO: bypass reading old chunk if this write operation overwrites
|
||||||
@ -1117,6 +1176,8 @@ spdk_reduce_vol_writev(struct spdk_reduce_vol *vol,
|
|||||||
spdk_reduce_vol_op_complete cb_fn, void *cb_arg)
|
spdk_reduce_vol_op_complete cb_fn, void *cb_arg)
|
||||||
{
|
{
|
||||||
struct spdk_reduce_vol_request *req;
|
struct spdk_reduce_vol_request *req;
|
||||||
|
uint64_t logical_map_index;
|
||||||
|
bool overlapped;
|
||||||
|
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
cb_fn(cb_arg, 0);
|
cb_fn(cb_arg, 0);
|
||||||
@ -1133,6 +1194,9 @@ spdk_reduce_vol_writev(struct spdk_reduce_vol *vol,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logical_map_index = offset / vol->logical_blocks_per_chunk;
|
||||||
|
overlapped = _check_overlap(vol, logical_map_index);
|
||||||
|
|
||||||
req = TAILQ_FIRST(&vol->free_requests);
|
req = TAILQ_FIRST(&vol->free_requests);
|
||||||
if (req == NULL) {
|
if (req == NULL) {
|
||||||
cb_fn(cb_arg, -ENOMEM);
|
cb_fn(cb_arg, -ENOMEM);
|
||||||
@ -1140,6 +1204,7 @@ spdk_reduce_vol_writev(struct spdk_reduce_vol *vol,
|
|||||||
}
|
}
|
||||||
|
|
||||||
TAILQ_REMOVE(&vol->free_requests, req, tailq);
|
TAILQ_REMOVE(&vol->free_requests, req, tailq);
|
||||||
|
req->type = REDUCE_IO_WRITEV;
|
||||||
req->vol = vol;
|
req->vol = vol;
|
||||||
req->iov = iov;
|
req->iov = iov;
|
||||||
req->iovcnt = iovcnt;
|
req->iovcnt = iovcnt;
|
||||||
@ -1149,7 +1214,11 @@ spdk_reduce_vol_writev(struct spdk_reduce_vol *vol,
|
|||||||
req->cb_fn = cb_fn;
|
req->cb_fn = cb_fn;
|
||||||
req->cb_arg = cb_arg;
|
req->cb_arg = cb_arg;
|
||||||
|
|
||||||
_start_writev_request(req);
|
if (!overlapped) {
|
||||||
|
_start_writev_request(req);
|
||||||
|
} else {
|
||||||
|
TAILQ_INSERT_TAIL(&vol->queued_requests, req, tailq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SPDK_LOG_REGISTER_COMPONENT("reduce", SPDK_LOG_REDUCE)
|
SPDK_LOG_REGISTER_COMPONENT("reduce", SPDK_LOG_REDUCE)
|
||||||
|
@ -961,6 +961,72 @@ defer_bdev_io(void)
|
|||||||
backing_dev_destroy(&backing_dev);
|
backing_dev_destroy(&backing_dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
overlapped(void)
|
||||||
|
{
|
||||||
|
struct spdk_reduce_vol_params params = {};
|
||||||
|
struct spdk_reduce_backing_dev backing_dev = {};
|
||||||
|
const uint32_t logical_block_size = 512;
|
||||||
|
struct iovec iov;
|
||||||
|
char buf[2 * logical_block_size];
|
||||||
|
char compare_buf[2 * logical_block_size];
|
||||||
|
|
||||||
|
params.chunk_size = 16 * 1024;
|
||||||
|
params.backing_io_unit_size = 4096;
|
||||||
|
params.logical_block_size = logical_block_size;
|
||||||
|
spdk_uuid_generate(¶ms.uuid);
|
||||||
|
|
||||||
|
backing_dev_init(&backing_dev, ¶ms, 512);
|
||||||
|
|
||||||
|
g_vol = NULL;
|
||||||
|
g_reduce_errno = -1;
|
||||||
|
spdk_reduce_vol_init(¶ms, &backing_dev, TEST_MD_PATH, init_cb, NULL);
|
||||||
|
CU_ASSERT(g_reduce_errno == 0);
|
||||||
|
SPDK_CU_ASSERT_FATAL(g_vol != NULL);
|
||||||
|
|
||||||
|
/* Write 0xAA to 1 512-byte logical block. */
|
||||||
|
memset(buf, 0xAA, logical_block_size);
|
||||||
|
iov.iov_base = buf;
|
||||||
|
iov.iov_len = logical_block_size;
|
||||||
|
g_reduce_errno = -100;
|
||||||
|
g_defer_bdev_io = true;
|
||||||
|
spdk_reduce_vol_writev(g_vol, &iov, 1, 0, 1, write_cb, NULL);
|
||||||
|
/* Callback should not have executed, so this should still equal -100. */
|
||||||
|
CU_ASSERT(g_reduce_errno == -100);
|
||||||
|
CU_ASSERT(!TAILQ_EMPTY(&g_pending_bdev_io));
|
||||||
|
CU_ASSERT(g_pending_bdev_io_count == params.chunk_size / params.backing_io_unit_size);
|
||||||
|
|
||||||
|
/* Now do an overlapped I/O to the same chunk. */
|
||||||
|
spdk_reduce_vol_writev(g_vol, &iov, 1, 1, 1, write_cb, NULL);
|
||||||
|
/* Callback should not have executed, so this should still equal -100. */
|
||||||
|
CU_ASSERT(g_reduce_errno == -100);
|
||||||
|
CU_ASSERT(!TAILQ_EMPTY(&g_pending_bdev_io));
|
||||||
|
/* The second I/O overlaps with the first one. So we should only see pending bdev_io
|
||||||
|
* related to the first I/O here - the second one won't start until the first one is completed.
|
||||||
|
*/
|
||||||
|
CU_ASSERT(g_pending_bdev_io_count == params.chunk_size / params.backing_io_unit_size);
|
||||||
|
|
||||||
|
backing_dev_io_execute(0);
|
||||||
|
CU_ASSERT(g_reduce_errno == 0);
|
||||||
|
|
||||||
|
g_defer_bdev_io = false;
|
||||||
|
memset(compare_buf, 0xAA, sizeof(compare_buf));
|
||||||
|
memset(buf, 0xFF, sizeof(buf));
|
||||||
|
iov.iov_base = buf;
|
||||||
|
iov.iov_len = 2 * logical_block_size;
|
||||||
|
g_reduce_errno = -100;
|
||||||
|
spdk_reduce_vol_readv(g_vol, &iov, 1, 0, 2, read_cb, NULL);
|
||||||
|
CU_ASSERT(g_reduce_errno == 0);
|
||||||
|
CU_ASSERT(memcmp(buf, compare_buf, 2 * logical_block_size) == 0);
|
||||||
|
|
||||||
|
g_reduce_errno = -1;
|
||||||
|
spdk_reduce_vol_unload(g_vol, unload_cb, NULL);
|
||||||
|
CU_ASSERT(g_reduce_errno == 0);
|
||||||
|
|
||||||
|
persistent_pm_buf_destroy();
|
||||||
|
backing_dev_destroy(&backing_dev);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
@ -987,7 +1053,8 @@ main(int argc, char **argv)
|
|||||||
CU_add_test(suite, "write_maps", write_maps) == NULL ||
|
CU_add_test(suite, "write_maps", write_maps) == NULL ||
|
||||||
CU_add_test(suite, "read_write", read_write) == NULL ||
|
CU_add_test(suite, "read_write", read_write) == NULL ||
|
||||||
CU_add_test(suite, "destroy", destroy) == NULL ||
|
CU_add_test(suite, "destroy", destroy) == NULL ||
|
||||||
CU_add_test(suite, "defer_bdev_io", defer_bdev_io) == NULL
|
CU_add_test(suite, "defer_bdev_io", defer_bdev_io) == NULL ||
|
||||||
|
CU_add_test(suite, "overlapped", overlapped) == NULL
|
||||||
) {
|
) {
|
||||||
CU_cleanup_registry();
|
CU_cleanup_registry();
|
||||||
return CU_get_error();
|
return CU_get_error();
|
||||||
|
Loading…
Reference in New Issue
Block a user