2022-04-27 09:18:05 +00:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
2022-11-01 20:26:26 +00:00
|
|
|
* Copyright (C) 2022 Intel Corporation.
|
2022-04-27 09:18:05 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "bdev_raid.h"
|
|
|
|
|
|
|
|
#include "spdk/env.h"
|
|
|
|
#include "spdk/thread.h"
|
|
|
|
#include "spdk/string.h"
|
|
|
|
#include "spdk/util.h"
|
2020-01-28 12:28:14 +00:00
|
|
|
#include "spdk/likely.h"
|
2022-04-27 09:18:05 +00:00
|
|
|
#include "spdk/log.h"
|
2022-10-05 09:03:37 +00:00
|
|
|
#include "spdk/accel.h"
|
2022-04-27 09:18:05 +00:00
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
/* Maximum concurrent full stripe writes per io channel */
|
|
|
|
#define RAID5F_MAX_STRIPES 32
|
|
|
|
|
|
|
|
struct chunk {
|
|
|
|
/* Corresponds to base_bdev index */
|
|
|
|
uint8_t index;
|
|
|
|
|
|
|
|
/* Array of iovecs */
|
|
|
|
struct iovec *iovs;
|
|
|
|
|
|
|
|
/* Number of used iovecs */
|
|
|
|
int iovcnt;
|
|
|
|
|
|
|
|
/* Total number of available iovecs in the array */
|
|
|
|
int iovcnt_max;
|
2022-10-27 08:32:37 +00:00
|
|
|
|
|
|
|
/* Pointer to buffer with I/O metadata */
|
|
|
|
void *md_buf;
|
|
|
|
|
|
|
|
/* Shallow copy of IO request parameters */
|
|
|
|
struct spdk_bdev_ext_io_opts ext_opts;
|
2021-04-30 14:11:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct stripe_request {
|
|
|
|
struct raid5f_io_channel *r5ch;
|
|
|
|
|
|
|
|
/* The associated raid_bdev_io */
|
|
|
|
struct raid_bdev_io *raid_io;
|
|
|
|
|
|
|
|
/* The stripe's index in the raid array. */
|
|
|
|
uint64_t stripe_index;
|
|
|
|
|
|
|
|
/* The stripe's parity chunk */
|
|
|
|
struct chunk *parity_chunk;
|
|
|
|
|
2022-07-28 12:52:20 +00:00
|
|
|
/* Buffer for stripe parity */
|
|
|
|
void *parity_buf;
|
|
|
|
|
2022-10-27 08:32:37 +00:00
|
|
|
/* Buffer for stripe io metadata parity */
|
|
|
|
void *parity_md_buf;
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
/* Array of iovec iterators for each data chunk */
|
|
|
|
struct iov_iter {
|
|
|
|
struct iovec *iovs;
|
|
|
|
int iovcnt;
|
|
|
|
int index;
|
|
|
|
size_t offset;
|
|
|
|
} *chunk_iov_iters;
|
|
|
|
|
|
|
|
/* Array of source buffer pointers for parity calculation */
|
|
|
|
void **chunk_xor_buffers;
|
|
|
|
|
|
|
|
/* Array of source buffer pointers for parity calculation of io metadata */
|
|
|
|
void **chunk_xor_md_buffers;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
void *dest;
|
|
|
|
size_t len;
|
|
|
|
size_t remaining;
|
|
|
|
size_t remaining_md;
|
|
|
|
int status;
|
|
|
|
} xor;
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
TAILQ_ENTRY(stripe_request) link;
|
|
|
|
|
|
|
|
/* Array of chunks corresponding to base_bdevs */
|
|
|
|
struct chunk chunks[0];
|
|
|
|
};
|
|
|
|
|
2022-04-27 09:18:05 +00:00
|
|
|
struct raid5f_info {
|
|
|
|
/* The parent raid bdev */
|
|
|
|
struct raid_bdev *raid_bdev;
|
|
|
|
|
|
|
|
/* Number of data blocks in a stripe (without parity) */
|
|
|
|
uint64_t stripe_blocks;
|
|
|
|
|
|
|
|
/* Number of stripes on this array */
|
|
|
|
uint64_t total_stripes;
|
2022-07-28 12:52:20 +00:00
|
|
|
|
|
|
|
/* Alignment for buffer allocation */
|
|
|
|
size_t buf_alignment;
|
2022-04-27 09:18:05 +00:00
|
|
|
};
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
struct raid5f_io_channel {
|
|
|
|
/* All available stripe requests on this channel */
|
|
|
|
TAILQ_HEAD(, stripe_request) free_stripe_requests;
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
/* accel_fw channel */
|
|
|
|
struct spdk_io_channel *accel_ch;
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
/* For retrying xor if accel_ch runs out of resources */
|
|
|
|
TAILQ_HEAD(, stripe_request) xor_retry_queue;
|
2021-04-30 14:11:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#define __CHUNK_IN_RANGE(req, c) \
|
|
|
|
c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs
|
|
|
|
|
|
|
|
#define FOR_EACH_CHUNK_FROM(req, c, from) \
|
|
|
|
for (c = from; __CHUNK_IN_RANGE(req, c); c++)
|
|
|
|
|
|
|
|
#define FOR_EACH_CHUNK(req, c) \
|
|
|
|
FOR_EACH_CHUNK_FROM(req, c, req->chunks)
|
|
|
|
|
|
|
|
#define __NEXT_DATA_CHUNK(req, c) \
|
|
|
|
c == req->parity_chunk ? c+1 : c
|
|
|
|
|
|
|
|
#define FOR_EACH_DATA_CHUNK(req, c) \
|
|
|
|
for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \
|
|
|
|
c = __NEXT_DATA_CHUNK(req, c+1))
|
|
|
|
|
|
|
|
static inline struct raid5f_info *
|
|
|
|
raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch)
|
|
|
|
{
|
|
|
|
return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct stripe_request *
|
|
|
|
raid5f_chunk_stripe_req(struct chunk *chunk)
|
|
|
|
{
|
|
|
|
return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks);
|
|
|
|
}
|
|
|
|
|
2022-04-27 09:18:05 +00:00
|
|
|
static inline uint8_t
|
|
|
|
raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev)
|
|
|
|
{
|
2022-11-28 10:37:12 +00:00
|
|
|
return raid_bdev->min_base_bdevs_operational;
|
2022-04-27 09:18:05 +00:00
|
|
|
}
|
|
|
|
|
2020-01-28 12:28:14 +00:00
|
|
|
static inline uint8_t
|
|
|
|
raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index)
|
|
|
|
{
|
|
|
|
return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs;
|
|
|
|
}
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
static inline void
|
|
|
|
raid5f_stripe_request_release(struct stripe_request *stripe_req)
|
|
|
|
{
|
|
|
|
TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link);
|
|
|
|
}
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
|
|
|
|
static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req);
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
static void
|
|
|
|
raid5f_xor_stripe_done(struct stripe_request *stripe_req)
|
|
|
|
{
|
|
|
|
if (stripe_req->xor.status != 0) {
|
|
|
|
SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status));
|
|
|
|
raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
} else {
|
|
|
|
raid5f_stripe_request_submit_chunks(stripe_req);
|
|
|
|
}
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
if (!TAILQ_EMPTY(&stripe_req->r5ch->xor_retry_queue)) {
|
|
|
|
stripe_req = TAILQ_FIRST(&stripe_req->r5ch->xor_retry_queue);
|
|
|
|
TAILQ_REMOVE(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
|
|
|
|
raid5f_xor_stripe_retry(stripe_req);
|
|
|
|
}
|
|
|
|
}
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req);
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
static void
|
|
|
|
_raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status)
|
|
|
|
{
|
|
|
|
if (status != 0) {
|
|
|
|
stripe_req->xor.status = status;
|
2022-07-28 12:52:20 +00:00
|
|
|
}
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) {
|
|
|
|
raid5f_xor_stripe_done(stripe_req);
|
|
|
|
}
|
|
|
|
}
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
static void
|
|
|
|
raid5f_xor_stripe_cb(void *_stripe_req, int status)
|
|
|
|
{
|
|
|
|
struct stripe_request *stripe_req = _stripe_req;
|
|
|
|
size_t len = stripe_req->xor.len;
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
stripe_req->xor.remaining -= len;
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
if (stripe_req->xor.remaining > 0) {
|
|
|
|
struct raid_bdev_io *raid_io = stripe_req->raid_io;
|
|
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
|
|
uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
|
|
|
|
uint8_t i;
|
2022-07-28 12:52:20 +00:00
|
|
|
|
|
|
|
for (i = 0; i < n_src; i++) {
|
2022-10-05 09:03:37 +00:00
|
|
|
struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[i];
|
2022-07-28 12:52:20 +00:00
|
|
|
struct iovec *iov = &iov_iter->iovs[iov_iter->index];
|
|
|
|
|
|
|
|
iov_iter->offset += len;
|
|
|
|
if (iov_iter->offset == iov->iov_len) {
|
|
|
|
iov_iter->offset = 0;
|
|
|
|
iov_iter->index++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
stripe_req->xor.dest += len;
|
|
|
|
|
|
|
|
raid5f_xor_stripe_continue(stripe_req);
|
|
|
|
}
|
|
|
|
|
|
|
|
_raid5f_xor_stripe_cb(stripe_req, status);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_xor_stripe_md_cb(void *_stripe_req, int status)
|
|
|
|
{
|
|
|
|
struct stripe_request *stripe_req = _stripe_req;
|
|
|
|
|
|
|
|
stripe_req->xor.remaining_md = 0;
|
|
|
|
|
|
|
|
_raid5f_xor_stripe_cb(stripe_req, status);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_xor_stripe_continue(struct stripe_request *stripe_req)
|
|
|
|
{
|
|
|
|
struct raid_bdev_io *raid_io = stripe_req->raid_io;
|
|
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
|
|
uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
|
|
|
|
size_t len = stripe_req->xor.remaining;
|
|
|
|
uint8_t i;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
assert(stripe_req->xor.remaining > 0);
|
|
|
|
|
|
|
|
for (i = 0; i < n_src; i++) {
|
|
|
|
struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[i];
|
|
|
|
struct iovec *iov = &iov_iter->iovs[iov_iter->index];
|
|
|
|
|
|
|
|
len = spdk_min(len, iov->iov_len - iov_iter->offset);
|
|
|
|
stripe_req->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(len > 0);
|
|
|
|
stripe_req->xor.len = len;
|
|
|
|
|
|
|
|
ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, stripe_req->xor.dest,
|
|
|
|
stripe_req->chunk_xor_buffers, n_src, len,
|
|
|
|
raid5f_xor_stripe_cb, stripe_req);
|
|
|
|
if (spdk_unlikely(ret)) {
|
|
|
|
if (ret == -ENOMEM) {
|
|
|
|
TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
|
|
|
|
} else {
|
|
|
|
stripe_req->xor.status = ret;
|
|
|
|
raid5f_xor_stripe_done(stripe_req);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_xor_stripe(struct stripe_request *stripe_req)
|
|
|
|
{
|
|
|
|
struct raid_bdev_io *raid_io = stripe_req->raid_io;
|
|
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
|
|
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
|
|
|
|
void *raid_md = spdk_bdev_io_get_md_buf(bdev_io);
|
|
|
|
uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
|
|
|
|
struct chunk *chunk;
|
|
|
|
uint8_t c;
|
|
|
|
|
|
|
|
c = 0;
|
|
|
|
FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
|
|
|
|
struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[c++];
|
|
|
|
|
|
|
|
iov_iter->iovs = chunk->iovs;
|
|
|
|
iov_iter->iovcnt = chunk->iovcnt;
|
|
|
|
iov_iter->index = 0;
|
|
|
|
iov_iter->offset = 0;
|
2022-07-28 12:52:20 +00:00
|
|
|
}
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
stripe_req->xor.dest = stripe_req->parity_buf;
|
|
|
|
stripe_req->xor.remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift;
|
|
|
|
stripe_req->xor.status = 0;
|
|
|
|
|
2022-10-27 08:32:37 +00:00
|
|
|
if (raid_md != NULL) {
|
2022-10-05 09:03:37 +00:00
|
|
|
uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
|
2022-10-27 08:32:37 +00:00
|
|
|
uint64_t len = raid_bdev->strip_size * raid_md_size;
|
2022-10-05 09:03:37 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
stripe_req->xor.remaining_md = len;
|
|
|
|
|
2022-10-27 08:32:37 +00:00
|
|
|
c = 0;
|
|
|
|
FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
|
2022-10-05 09:03:37 +00:00
|
|
|
stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf;
|
2022-10-27 08:32:37 +00:00
|
|
|
c++;
|
|
|
|
}
|
2022-10-05 09:03:37 +00:00
|
|
|
|
|
|
|
ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, stripe_req->parity_md_buf,
|
|
|
|
stripe_req->chunk_xor_md_buffers, n_src, len,
|
|
|
|
raid5f_xor_stripe_md_cb, stripe_req);
|
2022-10-27 08:32:37 +00:00
|
|
|
if (spdk_unlikely(ret)) {
|
2022-10-05 09:03:37 +00:00
|
|
|
if (ret == -ENOMEM) {
|
|
|
|
TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
|
|
|
|
} else {
|
|
|
|
stripe_req->xor.status = ret;
|
|
|
|
raid5f_xor_stripe_done(stripe_req);
|
|
|
|
}
|
|
|
|
return;
|
2022-10-27 08:32:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
raid5f_xor_stripe_continue(stripe_req);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_xor_stripe_retry(struct stripe_request *stripe_req)
|
|
|
|
{
|
|
|
|
if (stripe_req->xor.remaining_md) {
|
|
|
|
raid5f_xor_stripe(stripe_req);
|
|
|
|
} else {
|
|
|
|
raid5f_xor_stripe_continue(stripe_req);
|
|
|
|
}
|
2022-07-28 12:52:20 +00:00
|
|
|
}
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
static void
|
|
|
|
raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status)
|
|
|
|
{
|
|
|
|
struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
|
|
|
|
|
|
|
|
if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) {
|
|
|
|
raid5f_stripe_request_release(stripe_req);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_chunk_write_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct chunk *chunk = cb_arg;
|
|
|
|
|
|
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
|
|
|
|
raid5f_chunk_write_complete(chunk, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
|
|
|
|
SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_chunk_write_retry(void *_raid_io)
|
|
|
|
{
|
|
|
|
struct raid_bdev_io *raid_io = _raid_io;
|
|
|
|
struct stripe_request *stripe_req = raid_io->module_private;
|
|
|
|
|
|
|
|
raid5f_stripe_request_submit_chunks(stripe_req);
|
|
|
|
}
|
|
|
|
|
2022-10-27 08:32:37 +00:00
|
|
|
static inline void
|
bdev: remove spdk_bdev_ext_io_opts from spdk_bdev_io
The spdk_bdev_ext_io_opts structure is used to pass extra options when
submitting a bdev IO request, without having to modify/add functions to
handle new options. Additionally, the structure has a size field to
allow adding new fields without breaking the ABI (and thus having to
bump up the major version of a library).
It is also a part of spdk_bdev_io and there are several reasons for
removing it from that structure:
1. The size field only makes sense in structures that are passed
through pointers. And spdk_bdev_ext_io_opts is indeed passed as a
pointer to spdk_bdev_{readv,writev}_blocks_ext(), however it is
also embedded in spdk_bdev_io (internal.ext_opts_copy), which is
also part of the API. It means that each time a new field is added
to spdk_bdev_ext_io_opts, the size of spdk_bdev_io will also
change, so we will need to bump the major version of libspdk_bdev
anyway, thus making spdk_bdev_ext_io_opts.size useless.
2. The size field also makes internal.ext_opts cumbersome to use, as
each time one of its fields is accessed, we need to check the size.
Currently the code doesn't do that, because all of the existing
spdk_bdev_ext_io_opts fields were present when this structure was
initially introduced, but we'd need to do check the size before
accessing any new fields.
3. spdk_bdev_ext_io_opts has a metadata field, while spdk_bdev_io
already has u.bdev.md_buf, which means that we store the same thing
in several different places in spdk_bdev_io (u.bdev.md_buf,
u.bdev.ext_opts->metadata, internal.ext_opts->metadata).
Therefore, this patch removes all references to spdk_bdev_ext_io_opts
from spdk_bdev_io and replaces them with fields (memory_domain,
memory_domain_ctx) that were missing in spdk_bdev_io. Unfortunately,
this change breaks the API and requires changes in bdev modules that
supported spdk_bdev_io.u.bdev.ext_opts.
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I49b7524eb84d1d4d7f12b7ab025fec36da1ee01f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16773
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2023-02-10 15:22:51 +00:00
|
|
|
raid5f_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts)
|
2022-10-27 08:32:37 +00:00
|
|
|
{
|
bdev: remove spdk_bdev_ext_io_opts from spdk_bdev_io
The spdk_bdev_ext_io_opts structure is used to pass extra options when
submitting a bdev IO request, without having to modify/add functions to
handle new options. Additionally, the structure has a size field to
allow adding new fields without breaking the ABI (and thus having to
bump up the major version of a library).
It is also a part of spdk_bdev_io and there are several reasons for
removing it from that structure:
1. The size field only makes sense in structures that are passed
through pointers. And spdk_bdev_ext_io_opts is indeed passed as a
pointer to spdk_bdev_{readv,writev}_blocks_ext(), however it is
also embedded in spdk_bdev_io (internal.ext_opts_copy), which is
also part of the API. It means that each time a new field is added
to spdk_bdev_ext_io_opts, the size of spdk_bdev_io will also
change, so we will need to bump the major version of libspdk_bdev
anyway, thus making spdk_bdev_ext_io_opts.size useless.
2. The size field also makes internal.ext_opts cumbersome to use, as
each time one of its fields is accessed, we need to check the size.
Currently the code doesn't do that, because all of the existing
spdk_bdev_ext_io_opts fields were present when this structure was
initially introduced, but we'd need to do check the size before
accessing any new fields.
3. spdk_bdev_ext_io_opts has a metadata field, while spdk_bdev_io
already has u.bdev.md_buf, which means that we store the same thing
in several different places in spdk_bdev_io (u.bdev.md_buf,
u.bdev.ext_opts->metadata, internal.ext_opts->metadata).
Therefore, this patch removes all references to spdk_bdev_ext_io_opts
from spdk_bdev_io and replaces them with fields (memory_domain,
memory_domain_ctx) that were missing in spdk_bdev_io. Unfortunately,
this change breaks the API and requires changes in bdev modules that
supported spdk_bdev_io.u.bdev.ext_opts.
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I49b7524eb84d1d4d7f12b7ab025fec36da1ee01f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16773
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2023-02-10 15:22:51 +00:00
|
|
|
memset(opts, 0, sizeof(*opts));
|
|
|
|
opts->size = sizeof(*opts);
|
|
|
|
opts->memory_domain = bdev_io->u.bdev.memory_domain;
|
|
|
|
opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx;
|
|
|
|
opts->metadata = bdev_io->u.bdev.md_buf;
|
2022-10-27 08:32:37 +00:00
|
|
|
}
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
static int
|
|
|
|
raid5f_chunk_write(struct chunk *chunk)
|
|
|
|
{
|
|
|
|
struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
|
|
|
|
struct raid_bdev_io *raid_io = stripe_req->raid_io;
|
2022-10-27 08:32:37 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
|
2021-04-30 14:11:43 +00:00
|
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
|
|
struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index];
|
|
|
|
struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk->index];
|
|
|
|
uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift);
|
|
|
|
int ret;
|
|
|
|
|
bdev: remove spdk_bdev_ext_io_opts from spdk_bdev_io
The spdk_bdev_ext_io_opts structure is used to pass extra options when
submitting a bdev IO request, without having to modify/add functions to
handle new options. Additionally, the structure has a size field to
allow adding new fields without breaking the ABI (and thus having to
bump up the major version of a library).
It is also a part of spdk_bdev_io and there are several reasons for
removing it from that structure:
1. The size field only makes sense in structures that are passed
through pointers. And spdk_bdev_ext_io_opts is indeed passed as a
pointer to spdk_bdev_{readv,writev}_blocks_ext(), however it is
also embedded in spdk_bdev_io (internal.ext_opts_copy), which is
also part of the API. It means that each time a new field is added
to spdk_bdev_ext_io_opts, the size of spdk_bdev_io will also
change, so we will need to bump the major version of libspdk_bdev
anyway, thus making spdk_bdev_ext_io_opts.size useless.
2. The size field also makes internal.ext_opts cumbersome to use, as
each time one of its fields is accessed, we need to check the size.
Currently the code doesn't do that, because all of the existing
spdk_bdev_ext_io_opts fields were present when this structure was
initially introduced, but we'd need to do check the size before
accessing any new fields.
3. spdk_bdev_ext_io_opts has a metadata field, while spdk_bdev_io
already has u.bdev.md_buf, which means that we store the same thing
in several different places in spdk_bdev_io (u.bdev.md_buf,
u.bdev.ext_opts->metadata, internal.ext_opts->metadata).
Therefore, this patch removes all references to spdk_bdev_ext_io_opts
from spdk_bdev_io and replaces them with fields (memory_domain,
memory_domain_ctx) that were missing in spdk_bdev_io. Unfortunately,
this change breaks the API and requires changes in bdev modules that
supported spdk_bdev_io.u.bdev.ext_opts.
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I49b7524eb84d1d4d7f12b7ab025fec36da1ee01f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16773
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2023-02-10 15:22:51 +00:00
|
|
|
raid5f_init_ext_io_opts(bdev_io, &chunk->ext_opts);
|
|
|
|
chunk->ext_opts.metadata = chunk->md_buf;
|
|
|
|
|
2022-11-17 10:40:49 +00:00
|
|
|
ret = raid_bdev_writev_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
|
bdev: remove spdk_bdev_ext_io_opts from spdk_bdev_io
The spdk_bdev_ext_io_opts structure is used to pass extra options when
submitting a bdev IO request, without having to modify/add functions to
handle new options. Additionally, the structure has a size field to
allow adding new fields without breaking the ABI (and thus having to
bump up the major version of a library).
It is also a part of spdk_bdev_io and there are several reasons for
removing it from that structure:
1. The size field only makes sense in structures that are passed
through pointers. And spdk_bdev_ext_io_opts is indeed passed as a
pointer to spdk_bdev_{readv,writev}_blocks_ext(), however it is
also embedded in spdk_bdev_io (internal.ext_opts_copy), which is
also part of the API. It means that each time a new field is added
to spdk_bdev_ext_io_opts, the size of spdk_bdev_io will also
change, so we will need to bump the major version of libspdk_bdev
anyway, thus making spdk_bdev_ext_io_opts.size useless.
2. The size field also makes internal.ext_opts cumbersome to use, as
each time one of its fields is accessed, we need to check the size.
Currently the code doesn't do that, because all of the existing
spdk_bdev_ext_io_opts fields were present when this structure was
initially introduced, but we'd need to do check the size before
accessing any new fields.
3. spdk_bdev_ext_io_opts has a metadata field, while spdk_bdev_io
already has u.bdev.md_buf, which means that we store the same thing
in several different places in spdk_bdev_io (u.bdev.md_buf,
u.bdev.ext_opts->metadata, internal.ext_opts->metadata).
Therefore, this patch removes all references to spdk_bdev_ext_io_opts
from spdk_bdev_io and replaces them with fields (memory_domain,
memory_domain_ctx) that were missing in spdk_bdev_io. Unfortunately,
this change breaks the API and requires changes in bdev modules that
supported spdk_bdev_io.u.bdev.ext_opts.
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I49b7524eb84d1d4d7f12b7ab025fec36da1ee01f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16773
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2023-02-10 15:22:51 +00:00
|
|
|
base_offset_blocks, raid_bdev->strip_size, raid5f_chunk_write_complete_bdev_io,
|
|
|
|
chunk, &chunk->ext_opts);
|
2022-10-27 08:32:37 +00:00
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
if (spdk_unlikely(ret)) {
|
|
|
|
if (ret == -ENOMEM) {
|
|
|
|
raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
|
|
|
|
raid5f_chunk_write_retry);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Implicitly complete any I/Os not yet submitted as FAILED. If completing
|
|
|
|
* these means there are no more to complete for the stripe request, we can
|
|
|
|
* release the stripe request as well.
|
|
|
|
*/
|
2022-07-28 12:52:20 +00:00
|
|
|
uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
|
2021-04-30 14:11:43 +00:00
|
|
|
raid_io->base_bdev_io_submitted;
|
|
|
|
|
|
|
|
if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted,
|
|
|
|
SPDK_BDEV_IO_STATUS_FAILED)) {
|
|
|
|
raid5f_stripe_request_release(stripe_req);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2022-10-27 08:32:37 +00:00
|
|
|
raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req)
|
2021-04-30 14:11:43 +00:00
|
|
|
{
|
|
|
|
struct raid_bdev *raid_bdev = stripe_req->raid_io->raid_bdev;
|
2022-10-27 08:32:37 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(stripe_req->raid_io);
|
|
|
|
const struct iovec *raid_io_iovs = bdev_io->u.bdev.iovs;
|
|
|
|
int raid_io_iovcnt = bdev_io->u.bdev.iovcnt;
|
|
|
|
void *raid_io_md = spdk_bdev_io_get_md_buf(bdev_io);
|
|
|
|
uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
|
2021-04-30 14:11:43 +00:00
|
|
|
struct chunk *chunk;
|
|
|
|
int raid_io_iov_idx = 0;
|
|
|
|
size_t raid_io_offset = 0;
|
|
|
|
size_t raid_io_iov_offset = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
|
|
|
|
int chunk_iovcnt = 0;
|
|
|
|
uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
|
|
|
|
size_t off = raid_io_iov_offset;
|
|
|
|
|
|
|
|
for (i = raid_io_iov_idx; i < raid_io_iovcnt; i++) {
|
|
|
|
chunk_iovcnt++;
|
|
|
|
off += raid_io_iovs[i].iov_len;
|
|
|
|
if (off >= raid_io_offset + len) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(raid_io_iov_idx + chunk_iovcnt <= raid_io_iovcnt);
|
|
|
|
|
|
|
|
if (chunk_iovcnt > chunk->iovcnt_max) {
|
|
|
|
struct iovec *iovs = chunk->iovs;
|
|
|
|
|
|
|
|
iovs = realloc(iovs, chunk_iovcnt * sizeof(*iovs));
|
|
|
|
if (!iovs) {
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
chunk->iovs = iovs;
|
|
|
|
chunk->iovcnt_max = chunk_iovcnt;
|
|
|
|
}
|
|
|
|
chunk->iovcnt = chunk_iovcnt;
|
|
|
|
|
2022-10-27 08:32:37 +00:00
|
|
|
if (raid_io_md) {
|
|
|
|
chunk->md_buf = raid_io_md +
|
|
|
|
(raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size;
|
|
|
|
}
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
for (i = 0; i < chunk_iovcnt; i++) {
|
|
|
|
struct iovec *chunk_iov = &chunk->iovs[i];
|
|
|
|
const struct iovec *raid_io_iov = &raid_io_iovs[raid_io_iov_idx];
|
|
|
|
size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset;
|
|
|
|
|
|
|
|
chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset;
|
|
|
|
chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset);
|
|
|
|
raid_io_offset += chunk_iov->iov_len;
|
|
|
|
len -= chunk_iov->iov_len;
|
|
|
|
|
|
|
|
if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) {
|
|
|
|
raid_io_iov_idx++;
|
|
|
|
raid_io_iov_offset += raid_io_iov->iov_len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (spdk_unlikely(len > 0)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-28 12:52:20 +00:00
|
|
|
stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf;
|
|
|
|
stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size <<
|
|
|
|
raid_bdev->blocklen_shift;
|
2022-10-27 08:32:37 +00:00
|
|
|
stripe_req->parity_chunk->md_buf = stripe_req->parity_md_buf;
|
2022-07-28 12:52:20 +00:00
|
|
|
stripe_req->parity_chunk->iovcnt = 1;
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
|
|
|
|
{
|
|
|
|
struct raid_bdev_io *raid_io = stripe_req->raid_io;
|
|
|
|
struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
|
|
|
|
struct chunk *chunk;
|
|
|
|
|
|
|
|
FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
|
|
|
|
if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
raid_io->base_bdev_io_submitted++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
|
|
|
|
{
|
|
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
|
|
struct raid5f_io_channel *r5ch = spdk_io_channel_get_ctx(raid_io->raid_ch->module_channel);
|
|
|
|
struct stripe_request *stripe_req;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests);
|
|
|
|
if (!stripe_req) {
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
stripe_req->stripe_index = stripe_index;
|
|
|
|
stripe_req->parity_chunk = stripe_req->chunks + raid5f_stripe_parity_chunk_index(raid_bdev,
|
|
|
|
stripe_req->stripe_index);
|
|
|
|
stripe_req->raid_io = raid_io;
|
|
|
|
|
2022-10-27 08:32:37 +00:00
|
|
|
ret = raid5f_stripe_request_map_iovecs(stripe_req);
|
2021-04-30 14:11:43 +00:00
|
|
|
if (spdk_unlikely(ret)) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
|
|
|
|
|
|
|
|
raid_io->module_private = stripe_req;
|
2022-07-28 12:52:20 +00:00
|
|
|
raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
|
2021-04-30 14:11:43 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
raid5f_xor_stripe(stripe_req);
|
2021-04-30 14:11:43 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-01-28 12:28:14 +00:00
|
|
|
static void
|
|
|
|
raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
|
|
{
|
|
|
|
struct raid_bdev_io *raid_io = cb_arg;
|
|
|
|
|
|
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
|
|
|
|
raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
|
|
|
|
SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io);
|
|
|
|
|
|
|
|
static void
|
|
|
|
_raid5f_submit_rw_request(void *_raid_io)
|
|
|
|
{
|
|
|
|
struct raid_bdev_io *raid_io = _raid_io;
|
|
|
|
|
|
|
|
raid5f_submit_rw_request(raid_io);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index,
|
|
|
|
uint64_t stripe_offset)
|
|
|
|
{
|
|
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
|
|
uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift;
|
|
|
|
uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index);
|
|
|
|
uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1;
|
|
|
|
struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx];
|
|
|
|
struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx];
|
|
|
|
uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift);
|
|
|
|
uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset;
|
|
|
|
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
|
bdev: remove spdk_bdev_ext_io_opts from spdk_bdev_io
The spdk_bdev_ext_io_opts structure is used to pass extra options when
submitting a bdev IO request, without having to modify/add functions to
handle new options. Additionally, the structure has a size field to
allow adding new fields without breaking the ABI (and thus having to
bump up the major version of a library).
It is also a part of spdk_bdev_io and there are several reasons for
removing it from that structure:
1. The size field only makes sense in structures that are passed
through pointers. And spdk_bdev_ext_io_opts is indeed passed as a
pointer to spdk_bdev_{readv,writev}_blocks_ext(), however it is
also embedded in spdk_bdev_io (internal.ext_opts_copy), which is
also part of the API. It means that each time a new field is added
to spdk_bdev_ext_io_opts, the size of spdk_bdev_io will also
change, so we will need to bump the major version of libspdk_bdev
anyway, thus making spdk_bdev_ext_io_opts.size useless.
2. The size field also makes internal.ext_opts cumbersome to use, as
each time one of its fields is accessed, we need to check the size.
Currently the code doesn't do that, because all of the existing
spdk_bdev_ext_io_opts fields were present when this structure was
initially introduced, but we'd need to do check the size before
accessing any new fields.
3. spdk_bdev_ext_io_opts has a metadata field, while spdk_bdev_io
already has u.bdev.md_buf, which means that we store the same thing
in several different places in spdk_bdev_io (u.bdev.md_buf,
u.bdev.ext_opts->metadata, internal.ext_opts->metadata).
Therefore, this patch removes all references to spdk_bdev_ext_io_opts
from spdk_bdev_io and replaces them with fields (memory_domain,
memory_domain_ctx) that were missing in spdk_bdev_io. Unfortunately,
this change breaks the API and requires changes in bdev modules that
supported spdk_bdev_io.u.bdev.ext_opts.
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I49b7524eb84d1d4d7f12b7ab025fec36da1ee01f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16773
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2023-02-10 15:22:51 +00:00
|
|
|
struct spdk_bdev_ext_io_opts io_opts;
|
2020-01-28 12:28:14 +00:00
|
|
|
int ret;
|
|
|
|
|
bdev: remove spdk_bdev_ext_io_opts from spdk_bdev_io
The spdk_bdev_ext_io_opts structure is used to pass extra options when
submitting a bdev IO request, without having to modify/add functions to
handle new options. Additionally, the structure has a size field to
allow adding new fields without breaking the ABI (and thus having to
bump up the major version of a library).
It is also a part of spdk_bdev_io and there are several reasons for
removing it from that structure:
1. The size field only makes sense in structures that are passed
through pointers. And spdk_bdev_ext_io_opts is indeed passed as a
pointer to spdk_bdev_{readv,writev}_blocks_ext(), however it is
also embedded in spdk_bdev_io (internal.ext_opts_copy), which is
also part of the API. It means that each time a new field is added
to spdk_bdev_ext_io_opts, the size of spdk_bdev_io will also
change, so we will need to bump the major version of libspdk_bdev
anyway, thus making spdk_bdev_ext_io_opts.size useless.
2. The size field also makes internal.ext_opts cumbersome to use, as
each time one of its fields is accessed, we need to check the size.
Currently the code doesn't do that, because all of the existing
spdk_bdev_ext_io_opts fields were present when this structure was
initially introduced, but we'd need to do check the size before
accessing any new fields.
3. spdk_bdev_ext_io_opts has a metadata field, while spdk_bdev_io
already has u.bdev.md_buf, which means that we store the same thing
in several different places in spdk_bdev_io (u.bdev.md_buf,
u.bdev.ext_opts->metadata, internal.ext_opts->metadata).
Therefore, this patch removes all references to spdk_bdev_ext_io_opts
from spdk_bdev_io and replaces them with fields (memory_domain,
memory_domain_ctx) that were missing in spdk_bdev_io. Unfortunately,
this change breaks the API and requires changes in bdev modules that
supported spdk_bdev_io.u.bdev.ext_opts.
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I49b7524eb84d1d4d7f12b7ab025fec36da1ee01f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16773
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2023-02-10 15:22:51 +00:00
|
|
|
raid5f_init_ext_io_opts(bdev_io, &io_opts);
|
2022-11-17 10:40:49 +00:00
|
|
|
ret = raid_bdev_readv_blocks_ext(base_info, base_ch, bdev_io->u.bdev.iovs,
|
bdev: remove spdk_bdev_ext_io_opts from spdk_bdev_io
The spdk_bdev_ext_io_opts structure is used to pass extra options when
submitting a bdev IO request, without having to modify/add functions to
handle new options. Additionally, the structure has a size field to
allow adding new fields without breaking the ABI (and thus having to
bump up the major version of a library).
It is also a part of spdk_bdev_io and there are several reasons for
removing it from that structure:
1. The size field only makes sense in structures that are passed
through pointers. And spdk_bdev_ext_io_opts is indeed passed as a
pointer to spdk_bdev_{readv,writev}_blocks_ext(), however it is
also embedded in spdk_bdev_io (internal.ext_opts_copy), which is
also part of the API. It means that each time a new field is added
to spdk_bdev_ext_io_opts, the size of spdk_bdev_io will also
change, so we will need to bump the major version of libspdk_bdev
anyway, thus making spdk_bdev_ext_io_opts.size useless.
2. The size field also makes internal.ext_opts cumbersome to use, as
each time one of its fields is accessed, we need to check the size.
Currently the code doesn't do that, because all of the existing
spdk_bdev_ext_io_opts fields were present when this structure was
initially introduced, but we'd need to do check the size before
accessing any new fields.
3. spdk_bdev_ext_io_opts has a metadata field, while spdk_bdev_io
already has u.bdev.md_buf, which means that we store the same thing
in several different places in spdk_bdev_io (u.bdev.md_buf,
u.bdev.ext_opts->metadata, internal.ext_opts->metadata).
Therefore, this patch removes all references to spdk_bdev_ext_io_opts
from spdk_bdev_io and replaces them with fields (memory_domain,
memory_domain_ctx) that were missing in spdk_bdev_io. Unfortunately,
this change breaks the API and requires changes in bdev modules that
supported spdk_bdev_io.u.bdev.ext_opts.
Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I49b7524eb84d1d4d7f12b7ab025fec36da1ee01f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16773
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
2023-02-10 15:22:51 +00:00
|
|
|
bdev_io->u.bdev.iovcnt,
|
|
|
|
base_offset_blocks, bdev_io->u.bdev.num_blocks, raid5f_chunk_read_complete, raid_io,
|
|
|
|
&io_opts);
|
2022-10-27 08:32:37 +00:00
|
|
|
|
2020-01-28 12:28:14 +00:00
|
|
|
if (spdk_unlikely(ret == -ENOMEM)) {
|
|
|
|
raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
|
|
|
|
_raid5f_submit_rw_request);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-04-27 09:18:05 +00:00
|
|
|
static void
|
|
|
|
raid5f_submit_rw_request(struct raid_bdev_io *raid_io)
|
|
|
|
{
|
2020-01-28 12:28:14 +00:00
|
|
|
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
|
2021-04-30 14:11:43 +00:00
|
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
|
|
struct raid5f_info *r5f_info = raid_bdev->module_private;
|
2020-01-28 12:28:14 +00:00
|
|
|
uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks;
|
|
|
|
uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks;
|
|
|
|
uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
2021-04-30 14:11:43 +00:00
|
|
|
assert(bdev_io->u.bdev.num_blocks <= raid_bdev->strip_size);
|
2020-01-28 12:28:14 +00:00
|
|
|
ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset);
|
|
|
|
break;
|
2021-04-30 14:11:43 +00:00
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
|
|
assert(stripe_offset == 0);
|
|
|
|
assert(bdev_io->u.bdev.num_blocks == r5f_info->stripe_blocks);
|
|
|
|
ret = raid5f_submit_write_request(raid_io, stripe_index);
|
|
|
|
break;
|
2020-01-28 12:28:14 +00:00
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (spdk_unlikely(ret)) {
|
2021-04-30 14:11:43 +00:00
|
|
|
raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM :
|
|
|
|
SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_stripe_request_free(struct stripe_request *stripe_req)
|
|
|
|
{
|
|
|
|
struct chunk *chunk;
|
|
|
|
|
|
|
|
FOR_EACH_CHUNK(stripe_req, chunk) {
|
|
|
|
free(chunk->iovs);
|
|
|
|
}
|
|
|
|
|
2022-07-28 12:52:20 +00:00
|
|
|
spdk_dma_free(stripe_req->parity_buf);
|
2022-10-27 08:32:37 +00:00
|
|
|
spdk_dma_free(stripe_req->parity_md_buf);
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
free(stripe_req->chunk_xor_buffers);
|
|
|
|
free(stripe_req->chunk_xor_md_buffers);
|
|
|
|
free(stripe_req->chunk_iov_iters);
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
free(stripe_req);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct stripe_request *
|
|
|
|
raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch)
|
|
|
|
{
|
|
|
|
struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch);
|
|
|
|
struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
|
2022-10-27 08:32:37 +00:00
|
|
|
uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
|
2021-04-30 14:11:43 +00:00
|
|
|
struct stripe_request *stripe_req;
|
|
|
|
struct chunk *chunk;
|
|
|
|
|
|
|
|
stripe_req = calloc(1, sizeof(*stripe_req) +
|
|
|
|
sizeof(struct chunk) * raid_bdev->num_base_bdevs);
|
|
|
|
if (!stripe_req) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
stripe_req->r5ch = r5ch;
|
|
|
|
|
|
|
|
FOR_EACH_CHUNK(stripe_req, chunk) {
|
|
|
|
chunk->index = chunk - stripe_req->chunks;
|
|
|
|
chunk->iovcnt_max = 4;
|
|
|
|
chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
|
|
|
|
if (!chunk->iovs) {
|
2022-07-28 12:52:20 +00:00
|
|
|
goto err;
|
2021-04-30 14:11:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-28 12:52:20 +00:00
|
|
|
stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift,
|
|
|
|
r5f_info->buf_alignment, NULL);
|
|
|
|
if (!stripe_req->parity_buf) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2022-10-27 08:32:37 +00:00
|
|
|
if (raid_io_md_size != 0) {
|
|
|
|
stripe_req->parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size,
|
|
|
|
r5f_info->buf_alignment, NULL);
|
|
|
|
if (!stripe_req->parity_md_buf) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
stripe_req->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
|
|
|
|
sizeof(stripe_req->chunk_iov_iters[0]));
|
|
|
|
if (!stripe_req->chunk_iov_iters) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
|
|
|
|
sizeof(stripe_req->chunk_xor_buffers[0]));
|
|
|
|
if (!stripe_req->chunk_xor_buffers) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
|
|
|
|
sizeof(stripe_req->chunk_xor_md_buffers[0]));
|
|
|
|
if (!stripe_req->chunk_xor_md_buffers) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
return stripe_req;
|
2022-07-28 12:52:20 +00:00
|
|
|
err:
|
|
|
|
raid5f_stripe_request_free(stripe_req);
|
|
|
|
return NULL;
|
2021-04-30 14:11:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
raid5f_ioch_destroy(void *io_device, void *ctx_buf)
|
|
|
|
{
|
|
|
|
struct raid5f_io_channel *r5ch = ctx_buf;
|
|
|
|
struct stripe_request *stripe_req;
|
2022-10-05 09:03:37 +00:00
|
|
|
|
|
|
|
assert(TAILQ_EMPTY(&r5ch->xor_retry_queue));
|
2021-04-30 14:11:43 +00:00
|
|
|
|
|
|
|
while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) {
|
|
|
|
TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
|
|
|
|
raid5f_stripe_request_free(stripe_req);
|
2020-01-28 12:28:14 +00:00
|
|
|
}
|
2022-07-28 12:52:20 +00:00
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
if (r5ch->accel_ch) {
|
|
|
|
spdk_put_io_channel(r5ch->accel_ch);
|
2022-07-28 12:52:20 +00:00
|
|
|
}
|
2022-04-27 09:18:05 +00:00
|
|
|
}
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
static int
|
|
|
|
raid5f_ioch_create(void *io_device, void *ctx_buf)
|
|
|
|
{
|
|
|
|
struct raid5f_io_channel *r5ch = ctx_buf;
|
|
|
|
struct raid5f_info *r5f_info = io_device;
|
2022-07-28 12:52:20 +00:00
|
|
|
int status = 0;
|
2021-04-30 14:11:43 +00:00
|
|
|
int i;
|
|
|
|
|
|
|
|
TAILQ_INIT(&r5ch->free_stripe_requests);
|
|
|
|
|
|
|
|
for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
|
|
|
|
struct stripe_request *stripe_req;
|
|
|
|
|
|
|
|
stripe_req = raid5f_stripe_request_alloc(r5ch);
|
|
|
|
if (!stripe_req) {
|
2022-07-28 12:52:20 +00:00
|
|
|
status = -ENOMEM;
|
|
|
|
goto out;
|
2021-04-30 14:11:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link);
|
|
|
|
}
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
r5ch->accel_ch = spdk_accel_get_io_channel();
|
|
|
|
if (!r5ch->accel_ch) {
|
|
|
|
SPDK_ERRLOG("Failed to get accel framework's IO channel\n");
|
2022-07-28 12:52:20 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-10-05 09:03:37 +00:00
|
|
|
TAILQ_INIT(&r5ch->xor_retry_queue);
|
2022-07-28 12:52:20 +00:00
|
|
|
out:
|
|
|
|
if (status) {
|
|
|
|
SPDK_ERRLOG("Failed to initialize io channel\n");
|
|
|
|
raid5f_ioch_destroy(r5f_info, r5ch);
|
|
|
|
}
|
|
|
|
return status;
|
2021-04-30 14:11:43 +00:00
|
|
|
}
|
|
|
|
|
2022-04-27 09:18:05 +00:00
|
|
|
static int
|
|
|
|
raid5f_start(struct raid_bdev *raid_bdev)
|
|
|
|
{
|
|
|
|
uint64_t min_blockcnt = UINT64_MAX;
|
2022-11-17 10:40:49 +00:00
|
|
|
uint64_t base_bdev_data_size;
|
2022-04-27 09:18:05 +00:00
|
|
|
struct raid_base_bdev_info *base_info;
|
|
|
|
struct raid5f_info *r5f_info;
|
2022-10-05 09:03:37 +00:00
|
|
|
size_t alignment = 0;
|
2022-04-27 09:18:05 +00:00
|
|
|
|
|
|
|
r5f_info = calloc(1, sizeof(*r5f_info));
|
|
|
|
if (!r5f_info) {
|
|
|
|
SPDK_ERRLOG("Failed to allocate r5f_info\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
r5f_info->raid_bdev = raid_bdev;
|
|
|
|
|
|
|
|
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
|
2022-11-17 10:40:49 +00:00
|
|
|
min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
|
2022-07-28 12:52:20 +00:00
|
|
|
alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev));
|
2022-04-27 09:18:05 +00:00
|
|
|
}
|
|
|
|
|
2022-11-17 10:40:49 +00:00
|
|
|
base_bdev_data_size = (min_blockcnt / raid_bdev->strip_size) * raid_bdev->strip_size;
|
|
|
|
|
|
|
|
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
|
|
|
|
base_info->data_size = base_bdev_data_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
r5f_info->total_stripes = base_bdev_data_size / raid_bdev->strip_size;
|
2022-04-27 09:18:05 +00:00
|
|
|
r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
|
2022-07-28 12:52:20 +00:00
|
|
|
r5f_info->buf_alignment = alignment;
|
2022-04-27 09:18:05 +00:00
|
|
|
|
|
|
|
raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
|
2020-01-28 12:28:14 +00:00
|
|
|
raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
|
2022-04-27 09:18:05 +00:00
|
|
|
raid_bdev->bdev.split_on_optimal_io_boundary = true;
|
2021-04-30 14:11:43 +00:00
|
|
|
raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks;
|
|
|
|
raid_bdev->bdev.split_on_write_unit = true;
|
2022-04-27 09:18:05 +00:00
|
|
|
|
|
|
|
raid_bdev->module_private = r5f_info;
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy,
|
|
|
|
sizeof(struct raid5f_io_channel), NULL);
|
|
|
|
|
2022-04-27 09:18:05 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
static void
|
|
|
|
raid5f_io_device_unregister_done(void *io_device)
|
|
|
|
{
|
|
|
|
struct raid5f_info *r5f_info = io_device;
|
|
|
|
|
2022-09-27 14:08:08 +00:00
|
|
|
raid_bdev_module_stop_done(r5f_info->raid_bdev);
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
free(r5f_info);
|
|
|
|
}
|
|
|
|
|
2022-09-27 14:08:08 +00:00
|
|
|
static bool
|
2022-04-27 09:18:05 +00:00
|
|
|
raid5f_stop(struct raid_bdev *raid_bdev)
|
|
|
|
{
|
|
|
|
struct raid5f_info *r5f_info = raid_bdev->module_private;
|
|
|
|
|
2021-04-30 14:11:43 +00:00
|
|
|
spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done);
|
2022-09-27 14:08:08 +00:00
|
|
|
|
|
|
|
return false;
|
2021-04-30 14:11:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct spdk_io_channel *
|
|
|
|
raid5f_get_io_channel(struct raid_bdev *raid_bdev)
|
|
|
|
{
|
|
|
|
struct raid5f_info *r5f_info = raid_bdev->module_private;
|
|
|
|
|
|
|
|
return spdk_get_io_channel(r5f_info);
|
2022-04-27 09:18:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct raid_bdev_module g_raid5f_module = {
|
|
|
|
.level = RAID5F,
|
|
|
|
.base_bdevs_min = 3,
|
2022-11-28 10:37:12 +00:00
|
|
|
.base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1},
|
2022-04-27 09:18:05 +00:00
|
|
|
.start = raid5f_start,
|
|
|
|
.stop = raid5f_stop,
|
|
|
|
.submit_rw_request = raid5f_submit_rw_request,
|
2021-04-30 14:11:43 +00:00
|
|
|
.get_io_channel = raid5f_get_io_channel,
|
2022-04-27 09:18:05 +00:00
|
|
|
};
|
|
|
|
RAID_MODULE_REGISTER(&g_raid5f_module)
|
|
|
|
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f)
|