raid5f: calculate and write parity
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> Change-Id: Ia1b82d555c966b9b291eeb2426c42846b93e7fec Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7703 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Mellanox Build Bot Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
69c448a30e
commit
c2eea87ac4
@ -11,6 +11,7 @@
|
||||
#include "spdk/util.h"
|
||||
#include "spdk/likely.h"
|
||||
#include "spdk/log.h"
|
||||
#include "spdk/xor.h"
|
||||
|
||||
/* Maximum concurrent full stripe writes per io channel */
|
||||
#define RAID5F_MAX_STRIPES 32
|
||||
@ -41,6 +42,9 @@ struct stripe_request {
|
||||
/* The stripe's parity chunk */
|
||||
struct chunk *parity_chunk;
|
||||
|
||||
/* Buffer for stripe parity */
|
||||
void *parity_buf;
|
||||
|
||||
TAILQ_ENTRY(stripe_request) link;
|
||||
|
||||
/* Array of chunks corresponding to base_bdevs */
|
||||
@ -56,11 +60,28 @@ struct raid5f_info {
|
||||
|
||||
/* Number of stripes on this array */
|
||||
uint64_t total_stripes;
|
||||
|
||||
/* Alignment for buffer allocation */
|
||||
size_t buf_alignment;
|
||||
};
|
||||
|
||||
struct raid5f_io_channel {
|
||||
/* All available stripe requests on this channel */
|
||||
TAILQ_HEAD(, stripe_request) free_stripe_requests;
|
||||
|
||||
/* Array of iovec iterators for each data chunk */
|
||||
struct iov_iter {
|
||||
struct iovec *iovs;
|
||||
int iovcnt;
|
||||
int index;
|
||||
size_t offset;
|
||||
} *chunk_iov_iters;
|
||||
|
||||
/* Array of source buffer pointers for parity calculation */
|
||||
void **chunk_xor_buffers;
|
||||
|
||||
/* Bounce buffers for parity calculation in case of unaligned source buffers */
|
||||
struct iovec *chunk_xor_bounce_buffers;
|
||||
};
|
||||
|
||||
#define __CHUNK_IN_RANGE(req, c) \
|
||||
@ -109,6 +130,87 @@ raid5f_stripe_request_release(struct stripe_request *stripe_req)
|
||||
TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link);
|
||||
}
|
||||
|
||||
static int
|
||||
raid5f_xor_stripe(struct stripe_request *stripe_req)
|
||||
{
|
||||
struct raid_bdev_io *raid_io = stripe_req->raid_io;
|
||||
struct raid5f_io_channel *r5ch = stripe_req->r5ch;
|
||||
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
||||
size_t remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift;
|
||||
uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
|
||||
void *dest = stripe_req->parity_buf;
|
||||
size_t alignment_mask = spdk_xor_get_optimal_alignment() - 1;
|
||||
struct chunk *chunk;
|
||||
int ret;
|
||||
uint8_t c;
|
||||
|
||||
c = 0;
|
||||
FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
|
||||
struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[c];
|
||||
bool aligned = true;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < chunk->iovcnt; i++) {
|
||||
if (((uintptr_t)chunk->iovs[i].iov_base & alignment_mask) ||
|
||||
(chunk->iovs[i].iov_len & alignment_mask)) {
|
||||
aligned = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (aligned) {
|
||||
iov_iter->iovs = chunk->iovs;
|
||||
iov_iter->iovcnt = chunk->iovcnt;
|
||||
} else {
|
||||
iov_iter->iovs = &r5ch->chunk_xor_bounce_buffers[c];
|
||||
iov_iter->iovcnt = 1;
|
||||
spdk_iovcpy(chunk->iovs, chunk->iovcnt, iov_iter->iovs, iov_iter->iovcnt);
|
||||
}
|
||||
|
||||
iov_iter->index = 0;
|
||||
iov_iter->offset = 0;
|
||||
|
||||
c++;
|
||||
}
|
||||
|
||||
while (remaining > 0) {
|
||||
size_t len = remaining;
|
||||
uint8_t i;
|
||||
|
||||
for (i = 0; i < n_src; i++) {
|
||||
struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
|
||||
struct iovec *iov = &iov_iter->iovs[iov_iter->index];
|
||||
|
||||
len = spdk_min(len, iov->iov_len - iov_iter->offset);
|
||||
r5ch->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset;
|
||||
}
|
||||
|
||||
assert(len > 0);
|
||||
|
||||
ret = spdk_xor_gen(dest, r5ch->chunk_xor_buffers, n_src, len);
|
||||
if (spdk_unlikely(ret)) {
|
||||
SPDK_ERRLOG("stripe xor failed\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < n_src; i++) {
|
||||
struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
|
||||
struct iovec *iov = &iov_iter->iovs[iov_iter->index];
|
||||
|
||||
iov_iter->offset += len;
|
||||
if (iov_iter->offset == iov->iov_len) {
|
||||
iov_iter->offset = 0;
|
||||
iov_iter->index++;
|
||||
}
|
||||
}
|
||||
dest += len;
|
||||
|
||||
remaining -= len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status)
|
||||
{
|
||||
@ -165,7 +267,7 @@ raid5f_chunk_write(struct chunk *chunk)
|
||||
* these means there are no more to complete for the stripe request, we can
|
||||
* release the stripe request as well.
|
||||
*/
|
||||
uint64_t base_bdev_io_not_submitted = raid5f_stripe_data_chunks_num(raid_bdev) -
|
||||
uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
|
||||
raid_io->base_bdev_io_submitted;
|
||||
|
||||
if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted,
|
||||
@ -238,6 +340,11 @@ raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req,
|
||||
}
|
||||
}
|
||||
|
||||
stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf;
|
||||
stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size <<
|
||||
raid_bdev->blocklen_shift;
|
||||
stripe_req->parity_chunk->iovcnt = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -248,15 +355,7 @@ raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
|
||||
struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
|
||||
struct chunk *chunk;
|
||||
|
||||
if (start >= stripe_req->parity_chunk) {
|
||||
start++;
|
||||
}
|
||||
|
||||
FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
|
||||
if (chunk == stripe_req->parity_chunk) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) {
|
||||
break;
|
||||
}
|
||||
@ -267,7 +366,10 @@ raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
|
||||
static void
|
||||
raid5f_submit_stripe_request(struct stripe_request *stripe_req)
|
||||
{
|
||||
/* TODO: parity */
|
||||
if (spdk_unlikely(raid5f_xor_stripe(stripe_req) != 0)) {
|
||||
raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED);
|
||||
return;
|
||||
}
|
||||
|
||||
raid5f_stripe_request_submit_chunks(stripe_req);
|
||||
}
|
||||
@ -300,7 +402,7 @@ raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
|
||||
TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
|
||||
|
||||
raid_io->module_private = stripe_req;
|
||||
raid_io->base_bdev_io_remaining = raid5f_stripe_data_chunks_num(raid_bdev);
|
||||
raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
|
||||
|
||||
raid5f_submit_stripe_request(stripe_req);
|
||||
|
||||
@ -397,6 +499,8 @@ raid5f_stripe_request_free(struct stripe_request *stripe_req)
|
||||
free(chunk->iovs);
|
||||
}
|
||||
|
||||
spdk_dma_free(stripe_req->parity_buf);
|
||||
|
||||
free(stripe_req);
|
||||
}
|
||||
|
||||
@ -421,24 +525,45 @@ raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch)
|
||||
chunk->iovcnt_max = 4;
|
||||
chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
|
||||
if (!chunk->iovs) {
|
||||
raid5f_stripe_request_free(stripe_req);
|
||||
return NULL;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift,
|
||||
r5f_info->buf_alignment, NULL);
|
||||
if (!stripe_req->parity_buf) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
return stripe_req;
|
||||
err:
|
||||
raid5f_stripe_request_free(stripe_req);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
raid5f_ioch_destroy(void *io_device, void *ctx_buf)
|
||||
{
|
||||
struct raid5f_io_channel *r5ch = ctx_buf;
|
||||
struct raid5f_info *r5f_info = io_device;
|
||||
struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
|
||||
struct stripe_request *stripe_req;
|
||||
int i;
|
||||
|
||||
while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) {
|
||||
TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
|
||||
raid5f_stripe_request_free(stripe_req);
|
||||
}
|
||||
|
||||
if (r5ch->chunk_xor_bounce_buffers) {
|
||||
for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
|
||||
free(r5ch->chunk_xor_bounce_buffers[i].iov_base);
|
||||
}
|
||||
free(r5ch->chunk_xor_bounce_buffers);
|
||||
}
|
||||
|
||||
free(r5ch->chunk_xor_buffers);
|
||||
free(r5ch->chunk_iov_iters);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -446,6 +571,9 @@ raid5f_ioch_create(void *io_device, void *ctx_buf)
|
||||
{
|
||||
struct raid5f_io_channel *r5ch = ctx_buf;
|
||||
struct raid5f_info *r5f_info = io_device;
|
||||
struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
|
||||
size_t chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
|
||||
int status = 0;
|
||||
int i;
|
||||
|
||||
TAILQ_INIT(&r5ch->free_stripe_requests);
|
||||
@ -455,15 +583,48 @@ raid5f_ioch_create(void *io_device, void *ctx_buf)
|
||||
|
||||
stripe_req = raid5f_stripe_request_alloc(r5ch);
|
||||
if (!stripe_req) {
|
||||
SPDK_ERRLOG("Failed to initialize io channel\n");
|
||||
raid5f_ioch_destroy(r5f_info, r5ch);
|
||||
return -ENOMEM;
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link);
|
||||
}
|
||||
|
||||
return 0;
|
||||
r5ch->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
|
||||
sizeof(r5ch->chunk_iov_iters[0]));
|
||||
if (!r5ch->chunk_iov_iters) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
r5ch->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
|
||||
sizeof(r5ch->chunk_xor_buffers[0]));
|
||||
if (!r5ch->chunk_xor_buffers) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
r5ch->chunk_xor_bounce_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
|
||||
sizeof(r5ch->chunk_xor_bounce_buffers[0]));
|
||||
if (!r5ch->chunk_xor_bounce_buffers) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
|
||||
status = posix_memalign(&r5ch->chunk_xor_bounce_buffers[i].iov_base,
|
||||
spdk_xor_get_optimal_alignment(), chunk_len);
|
||||
if (status) {
|
||||
goto out;
|
||||
}
|
||||
r5ch->chunk_xor_bounce_buffers[i].iov_len = chunk_len;
|
||||
}
|
||||
out:
|
||||
if (status) {
|
||||
SPDK_ERRLOG("Failed to initialize io channel\n");
|
||||
raid5f_ioch_destroy(r5f_info, r5ch);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -472,6 +633,7 @@ raid5f_start(struct raid_bdev *raid_bdev)
|
||||
uint64_t min_blockcnt = UINT64_MAX;
|
||||
struct raid_base_bdev_info *base_info;
|
||||
struct raid5f_info *r5f_info;
|
||||
size_t alignment;
|
||||
|
||||
r5f_info = calloc(1, sizeof(*r5f_info));
|
||||
if (!r5f_info) {
|
||||
@ -480,12 +642,15 @@ raid5f_start(struct raid_bdev *raid_bdev)
|
||||
}
|
||||
r5f_info->raid_bdev = raid_bdev;
|
||||
|
||||
alignment = spdk_xor_get_optimal_alignment();
|
||||
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
|
||||
min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt);
|
||||
alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev));
|
||||
}
|
||||
|
||||
r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
|
||||
r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
|
||||
r5f_info->buf_alignment = alignment;
|
||||
|
||||
raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
|
||||
raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
|
||||
|
@ -14,6 +14,7 @@
|
||||
DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module));
|
||||
DEFINE_STUB_V(raid_bdev_queue_io_wait, (struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
|
||||
struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn));
|
||||
DEFINE_STUB(spdk_bdev_get_buf_align, size_t, (const struct spdk_bdev *bdev), 0);
|
||||
|
||||
void
|
||||
raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
|
||||
@ -209,6 +210,9 @@ struct raid_io_info {
|
||||
void *src_buf;
|
||||
void *dest_buf;
|
||||
size_t buf_size;
|
||||
void *parity_buf;
|
||||
void *reference_parity;
|
||||
size_t parity_buf_size;
|
||||
enum spdk_bdev_io_status status;
|
||||
bool failed;
|
||||
int remaining;
|
||||
@ -341,21 +345,27 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
stripe_req = raid5f_chunk_stripe_req(chunk);
|
||||
test_raid_bdev_io = (struct test_raid_bdev_io *)spdk_bdev_io_from_ctx(stripe_req->raid_io);
|
||||
io_info = test_raid_bdev_io->io_info;
|
||||
raid_bdev = io_info->r5f_info->raid_bdev;
|
||||
|
||||
SPDK_CU_ASSERT_FATAL(chunk != stripe_req->parity_chunk);
|
||||
raid_bdev = io_info->r5f_info->raid_bdev;
|
||||
|
||||
stripe_idx_off = offset_blocks / raid_bdev->strip_size -
|
||||
io_info->offset_blocks / io_info->r5f_info->stripe_blocks;
|
||||
|
||||
data_chunk_idx = chunk < stripe_req->parity_chunk ? chunk->index : chunk->index - 1;
|
||||
dest_buf = test_raid_bdev_io->buf +
|
||||
(stripe_idx_off * io_info->r5f_info->stripe_blocks +
|
||||
data_chunk_idx * raid_bdev->strip_size) *
|
||||
raid_bdev->bdev.blocklen;
|
||||
if (chunk == stripe_req->parity_chunk) {
|
||||
if (io_info->parity_buf == NULL) {
|
||||
goto submit;
|
||||
}
|
||||
dest_buf = io_info->parity_buf + stripe_idx_off * raid_bdev->strip_size_kb * 1024;
|
||||
} else {
|
||||
data_chunk_idx = chunk < stripe_req->parity_chunk ? chunk->index : chunk->index - 1;
|
||||
dest_buf = test_raid_bdev_io->buf +
|
||||
(stripe_idx_off * io_info->r5f_info->stripe_blocks +
|
||||
data_chunk_idx * raid_bdev->strip_size) *
|
||||
raid_bdev->bdev.blocklen;
|
||||
}
|
||||
|
||||
memcpy(dest_buf, iov->iov_base, iov->iov_len);
|
||||
|
||||
submit:
|
||||
submit_io(test_raid_bdev_io->io_info, desc, cb, cb_arg);
|
||||
|
||||
return 0;
|
||||
@ -382,6 +392,14 @@ spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
xor_block(uint8_t *a, uint8_t *b, size_t size)
|
||||
{
|
||||
while (size-- > 0) {
|
||||
a[size] ^= b[size];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_raid5f_write_request(struct raid_io_info *io_info)
|
||||
{
|
||||
@ -394,6 +412,11 @@ test_raid5f_write_request(struct raid_io_info *io_info)
|
||||
raid5f_submit_rw_request(raid_io);
|
||||
|
||||
process_io_completions(io_info);
|
||||
|
||||
if (io_info->status == SPDK_BDEV_IO_STATUS_SUCCESS && io_info->parity_buf) {
|
||||
CU_ASSERT(memcmp(io_info->parity_buf, io_info->reference_parity,
|
||||
io_info->parity_buf_size) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -424,6 +447,8 @@ deinit_io_info(struct raid_io_info *io_info)
|
||||
{
|
||||
free(io_info->src_buf);
|
||||
free(io_info->dest_buf);
|
||||
free(io_info->parity_buf);
|
||||
free(io_info->reference_parity);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -463,6 +488,35 @@ init_io_info(struct raid_io_info *io_info, struct raid5f_info *r5f_info,
|
||||
TAILQ_INIT(&io_info->bdev_io_queue);
|
||||
}
|
||||
|
||||
static void
|
||||
io_info_setup_parity(struct raid_io_info *io_info)
|
||||
{
|
||||
struct raid5f_info *r5f_info = io_info->r5f_info;
|
||||
struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
|
||||
uint32_t blocklen = raid_bdev->bdev.blocklen;
|
||||
uint64_t num_stripes = io_info->num_blocks / r5f_info->stripe_blocks;
|
||||
size_t strip_len = raid_bdev->strip_size * blocklen;
|
||||
void *src = io_info->src_buf;
|
||||
void *dest;
|
||||
unsigned i, j;
|
||||
|
||||
io_info->parity_buf_size = num_stripes * strip_len;
|
||||
io_info->parity_buf = calloc(1, io_info->parity_buf_size);
|
||||
SPDK_CU_ASSERT_FATAL(io_info->parity_buf != NULL);
|
||||
|
||||
io_info->reference_parity = calloc(1, io_info->parity_buf_size);
|
||||
SPDK_CU_ASSERT_FATAL(io_info->reference_parity != NULL);
|
||||
|
||||
dest = io_info->reference_parity;
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
for (j = 0; j < raid5f_stripe_data_chunks_num(raid_bdev); j++) {
|
||||
xor_block(dest, src, strip_len);
|
||||
src += strip_len;
|
||||
}
|
||||
dest += strip_len;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_raid5f_submit_rw_request(struct raid5f_info *r5f_info, struct raid_bdev_io_channel *raid_ch,
|
||||
enum spdk_bdev_io_type io_type, uint64_t stripe_index, uint64_t stripe_offset_blocks,
|
||||
@ -478,6 +532,7 @@ test_raid5f_submit_rw_request(struct raid5f_info *r5f_info, struct raid_bdev_io_
|
||||
test_raid5f_read_request(&io_info);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
io_info_setup_parity(&io_info);
|
||||
test_raid5f_write_request(&io_info);
|
||||
break;
|
||||
default:
|
||||
|
Loading…
Reference in New Issue
Block a user