raid5f: calculate and write parity

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: Ia1b82d555c966b9b291eeb2426c42846b93e7fec
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7703
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Artur Paszkiewicz 2022-07-28 14:52:20 +02:00 committed by Tomasz Zawadzki
parent 69c448a30e
commit c2eea87ac4
2 changed files with 245 additions and 25 deletions

View File

@ -11,6 +11,7 @@
#include "spdk/util.h"
#include "spdk/likely.h"
#include "spdk/log.h"
#include "spdk/xor.h"
/* Maximum concurrent full stripe writes per io channel */
#define RAID5F_MAX_STRIPES 32
@ -41,6 +42,9 @@ struct stripe_request {
/* The stripe's parity chunk */
struct chunk *parity_chunk;
/* Buffer for stripe parity */
void *parity_buf;
TAILQ_ENTRY(stripe_request) link;
/* Array of chunks corresponding to base_bdevs */
@ -56,11 +60,28 @@ struct raid5f_info {
/* Number of stripes on this array */
uint64_t total_stripes;
/* Alignment for buffer allocation */
size_t buf_alignment;
};
struct raid5f_io_channel {
/* All available stripe requests on this channel */
TAILQ_HEAD(, stripe_request) free_stripe_requests;
/* Array of iovec iterators for each data chunk */
struct iov_iter {
struct iovec *iovs;
int iovcnt;
int index;
size_t offset;
} *chunk_iov_iters;
/* Array of source buffer pointers for parity calculation */
void **chunk_xor_buffers;
/* Bounce buffers for parity calculation in case of unaligned source buffers */
struct iovec *chunk_xor_bounce_buffers;
};
#define __CHUNK_IN_RANGE(req, c) \
@ -109,6 +130,87 @@ raid5f_stripe_request_release(struct stripe_request *stripe_req)
TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link);
}
static int
raid5f_xor_stripe(struct stripe_request *stripe_req)
{
struct raid_bdev_io *raid_io = stripe_req->raid_io;
struct raid5f_io_channel *r5ch = stripe_req->r5ch;
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
size_t remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift;
uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
void *dest = stripe_req->parity_buf;
size_t alignment_mask = spdk_xor_get_optimal_alignment() - 1;
struct chunk *chunk;
int ret;
uint8_t c;
c = 0;
FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[c];
bool aligned = true;
int i;
for (i = 0; i < chunk->iovcnt; i++) {
if (((uintptr_t)chunk->iovs[i].iov_base & alignment_mask) ||
(chunk->iovs[i].iov_len & alignment_mask)) {
aligned = false;
break;
}
}
if (aligned) {
iov_iter->iovs = chunk->iovs;
iov_iter->iovcnt = chunk->iovcnt;
} else {
iov_iter->iovs = &r5ch->chunk_xor_bounce_buffers[c];
iov_iter->iovcnt = 1;
spdk_iovcpy(chunk->iovs, chunk->iovcnt, iov_iter->iovs, iov_iter->iovcnt);
}
iov_iter->index = 0;
iov_iter->offset = 0;
c++;
}
while (remaining > 0) {
size_t len = remaining;
uint8_t i;
for (i = 0; i < n_src; i++) {
struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
struct iovec *iov = &iov_iter->iovs[iov_iter->index];
len = spdk_min(len, iov->iov_len - iov_iter->offset);
r5ch->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset;
}
assert(len > 0);
ret = spdk_xor_gen(dest, r5ch->chunk_xor_buffers, n_src, len);
if (spdk_unlikely(ret)) {
SPDK_ERRLOG("stripe xor failed\n");
return ret;
}
for (i = 0; i < n_src; i++) {
struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
struct iovec *iov = &iov_iter->iovs[iov_iter->index];
iov_iter->offset += len;
if (iov_iter->offset == iov->iov_len) {
iov_iter->offset = 0;
iov_iter->index++;
}
}
dest += len;
remaining -= len;
}
return 0;
}
static void
raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status)
{
@ -165,7 +267,7 @@ raid5f_chunk_write(struct chunk *chunk)
* these means there are no more to complete for the stripe request, we can
* release the stripe request as well.
*/
uint64_t base_bdev_io_not_submitted = raid5f_stripe_data_chunks_num(raid_bdev) -
uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
raid_io->base_bdev_io_submitted;
if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted,
@ -238,6 +340,11 @@ raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req,
}
}
stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf;
stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size <<
raid_bdev->blocklen_shift;
stripe_req->parity_chunk->iovcnt = 1;
return 0;
}
@ -248,15 +355,7 @@ raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
struct chunk *chunk;
if (start >= stripe_req->parity_chunk) {
start++;
}
FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
if (chunk == stripe_req->parity_chunk) {
continue;
}
if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) {
break;
}
@ -267,7 +366,10 @@ raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
static void
raid5f_submit_stripe_request(struct stripe_request *stripe_req)
{
/* TODO: parity */
if (spdk_unlikely(raid5f_xor_stripe(stripe_req) != 0)) {
raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED);
return;
}
raid5f_stripe_request_submit_chunks(stripe_req);
}
@ -300,7 +402,7 @@ raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
raid_io->module_private = stripe_req;
raid_io->base_bdev_io_remaining = raid5f_stripe_data_chunks_num(raid_bdev);
raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
raid5f_submit_stripe_request(stripe_req);
@ -397,6 +499,8 @@ raid5f_stripe_request_free(struct stripe_request *stripe_req)
free(chunk->iovs);
}
spdk_dma_free(stripe_req->parity_buf);
free(stripe_req);
}
@ -421,24 +525,45 @@ raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch)
chunk->iovcnt_max = 4;
chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
if (!chunk->iovs) {
raid5f_stripe_request_free(stripe_req);
return NULL;
goto err;
}
}
stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift,
r5f_info->buf_alignment, NULL);
if (!stripe_req->parity_buf) {
goto err;
}
return stripe_req;
err:
raid5f_stripe_request_free(stripe_req);
return NULL;
}
static void
raid5f_ioch_destroy(void *io_device, void *ctx_buf)
{
struct raid5f_io_channel *r5ch = ctx_buf;
struct raid5f_info *r5f_info = io_device;
struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
struct stripe_request *stripe_req;
int i;
while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) {
TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
raid5f_stripe_request_free(stripe_req);
}
if (r5ch->chunk_xor_bounce_buffers) {
for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
free(r5ch->chunk_xor_bounce_buffers[i].iov_base);
}
free(r5ch->chunk_xor_bounce_buffers);
}
free(r5ch->chunk_xor_buffers);
free(r5ch->chunk_iov_iters);
}
static int
@ -446,6 +571,9 @@ raid5f_ioch_create(void *io_device, void *ctx_buf)
{
struct raid5f_io_channel *r5ch = ctx_buf;
struct raid5f_info *r5f_info = io_device;
struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
size_t chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
int status = 0;
int i;
TAILQ_INIT(&r5ch->free_stripe_requests);
@ -455,15 +583,48 @@ raid5f_ioch_create(void *io_device, void *ctx_buf)
stripe_req = raid5f_stripe_request_alloc(r5ch);
if (!stripe_req) {
SPDK_ERRLOG("Failed to initialize io channel\n");
raid5f_ioch_destroy(r5f_info, r5ch);
return -ENOMEM;
status = -ENOMEM;
goto out;
}
TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link);
}
return 0;
r5ch->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
sizeof(r5ch->chunk_iov_iters[0]));
if (!r5ch->chunk_iov_iters) {
status = -ENOMEM;
goto out;
}
r5ch->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
sizeof(r5ch->chunk_xor_buffers[0]));
if (!r5ch->chunk_xor_buffers) {
status = -ENOMEM;
goto out;
}
r5ch->chunk_xor_bounce_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
sizeof(r5ch->chunk_xor_bounce_buffers[0]));
if (!r5ch->chunk_xor_bounce_buffers) {
status = -ENOMEM;
goto out;
}
for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
status = posix_memalign(&r5ch->chunk_xor_bounce_buffers[i].iov_base,
spdk_xor_get_optimal_alignment(), chunk_len);
if (status) {
goto out;
}
r5ch->chunk_xor_bounce_buffers[i].iov_len = chunk_len;
}
out:
if (status) {
SPDK_ERRLOG("Failed to initialize io channel\n");
raid5f_ioch_destroy(r5f_info, r5ch);
}
return status;
}
static int
@ -472,6 +633,7 @@ raid5f_start(struct raid_bdev *raid_bdev)
uint64_t min_blockcnt = UINT64_MAX;
struct raid_base_bdev_info *base_info;
struct raid5f_info *r5f_info;
size_t alignment;
r5f_info = calloc(1, sizeof(*r5f_info));
if (!r5f_info) {
@ -480,12 +642,15 @@ raid5f_start(struct raid_bdev *raid_bdev)
}
r5f_info->raid_bdev = raid_bdev;
alignment = spdk_xor_get_optimal_alignment();
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt);
alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev));
}
r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
r5f_info->buf_alignment = alignment;
raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;

View File

@ -14,6 +14,7 @@
DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module));
DEFINE_STUB_V(raid_bdev_queue_io_wait, (struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn));
DEFINE_STUB(spdk_bdev_get_buf_align, size_t, (const struct spdk_bdev *bdev), 0);
void
raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
@ -209,6 +210,9 @@ struct raid_io_info {
void *src_buf;
void *dest_buf;
size_t buf_size;
void *parity_buf;
void *reference_parity;
size_t parity_buf_size;
enum spdk_bdev_io_status status;
bool failed;
int remaining;
@ -341,21 +345,27 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
stripe_req = raid5f_chunk_stripe_req(chunk);
test_raid_bdev_io = (struct test_raid_bdev_io *)spdk_bdev_io_from_ctx(stripe_req->raid_io);
io_info = test_raid_bdev_io->io_info;
raid_bdev = io_info->r5f_info->raid_bdev;
SPDK_CU_ASSERT_FATAL(chunk != stripe_req->parity_chunk);
raid_bdev = io_info->r5f_info->raid_bdev;
stripe_idx_off = offset_blocks / raid_bdev->strip_size -
io_info->offset_blocks / io_info->r5f_info->stripe_blocks;
data_chunk_idx = chunk < stripe_req->parity_chunk ? chunk->index : chunk->index - 1;
dest_buf = test_raid_bdev_io->buf +
(stripe_idx_off * io_info->r5f_info->stripe_blocks +
data_chunk_idx * raid_bdev->strip_size) *
raid_bdev->bdev.blocklen;
if (chunk == stripe_req->parity_chunk) {
if (io_info->parity_buf == NULL) {
goto submit;
}
dest_buf = io_info->parity_buf + stripe_idx_off * raid_bdev->strip_size_kb * 1024;
} else {
data_chunk_idx = chunk < stripe_req->parity_chunk ? chunk->index : chunk->index - 1;
dest_buf = test_raid_bdev_io->buf +
(stripe_idx_off * io_info->r5f_info->stripe_blocks +
data_chunk_idx * raid_bdev->strip_size) *
raid_bdev->bdev.blocklen;
}
memcpy(dest_buf, iov->iov_base, iov->iov_len);
submit:
submit_io(test_raid_bdev_io->io_info, desc, cb, cb_arg);
return 0;
@ -382,6 +392,14 @@ spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
return 0;
}
static void
xor_block(uint8_t *a, uint8_t *b, size_t size)
{
while (size-- > 0) {
a[size] ^= b[size];
}
}
static void
test_raid5f_write_request(struct raid_io_info *io_info)
{
@ -394,6 +412,11 @@ test_raid5f_write_request(struct raid_io_info *io_info)
raid5f_submit_rw_request(raid_io);
process_io_completions(io_info);
if (io_info->status == SPDK_BDEV_IO_STATUS_SUCCESS && io_info->parity_buf) {
CU_ASSERT(memcmp(io_info->parity_buf, io_info->reference_parity,
io_info->parity_buf_size) == 0);
}
}
static void
@ -424,6 +447,8 @@ deinit_io_info(struct raid_io_info *io_info)
{
free(io_info->src_buf);
free(io_info->dest_buf);
free(io_info->parity_buf);
free(io_info->reference_parity);
}
static void
@ -463,6 +488,35 @@ init_io_info(struct raid_io_info *io_info, struct raid5f_info *r5f_info,
TAILQ_INIT(&io_info->bdev_io_queue);
}
static void
io_info_setup_parity(struct raid_io_info *io_info)
{
struct raid5f_info *r5f_info = io_info->r5f_info;
struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
uint32_t blocklen = raid_bdev->bdev.blocklen;
uint64_t num_stripes = io_info->num_blocks / r5f_info->stripe_blocks;
size_t strip_len = raid_bdev->strip_size * blocklen;
void *src = io_info->src_buf;
void *dest;
unsigned i, j;
io_info->parity_buf_size = num_stripes * strip_len;
io_info->parity_buf = calloc(1, io_info->parity_buf_size);
SPDK_CU_ASSERT_FATAL(io_info->parity_buf != NULL);
io_info->reference_parity = calloc(1, io_info->parity_buf_size);
SPDK_CU_ASSERT_FATAL(io_info->reference_parity != NULL);
dest = io_info->reference_parity;
for (i = 0; i < num_stripes; i++) {
for (j = 0; j < raid5f_stripe_data_chunks_num(raid_bdev); j++) {
xor_block(dest, src, strip_len);
src += strip_len;
}
dest += strip_len;
}
}
static void
test_raid5f_submit_rw_request(struct raid5f_info *r5f_info, struct raid_bdev_io_channel *raid_ch,
enum spdk_bdev_io_type io_type, uint64_t stripe_index, uint64_t stripe_offset_blocks,
@ -478,6 +532,7 @@ test_raid5f_submit_rw_request(struct raid5f_info *r5f_info, struct raid_bdev_io_
test_raid5f_read_request(&io_info);
break;
case SPDK_BDEV_IO_TYPE_WRITE:
io_info_setup_parity(&io_info);
test_raid5f_write_request(&io_info);
break;
default: