Spdk/module/bdev/raid/concat.c
paul luse a6dbe3721e update Intel copyright notices
per Intel policy to include file commit date using git cmd
below.  The policy does not apply to non-Intel (C) notices.

git log --follow -C90% --format=%ad --date default <file> | tail -1

and then pull just the 4 digit year from the result.

Intel copyrights were not added to files where Intel either had
no contribution ot the contribution lacked substance (ie license
header updates, formatting changes, etc).  Contribution date used
"--follow -C95%" to get the most accurate date.

Note that several files in this patch didn't end the license/(c)
block with a blank comment line so these were added as the vast
majority of files do have this last blank line.  Simply there for
consistency.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: Id5b7ce4f658fe87132f14139ead58d6e285c04d4
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15192
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Community-CI: Mellanox Build Bot
2022-11-10 08:28:53 +00:00

325 lines
9.1 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (C) 2022 Intel Corporation.
* Copyright (c) Peng Yu yupeng0921@gmail.com.
* All rights reserved.
*/
#include "bdev_raid.h"
#include "spdk/env.h"
#include "spdk/thread.h"
#include "spdk/string.h"
#include "spdk/util.h"
#include "spdk/log.h"
struct concat_block_range {
uint64_t start;
uint64_t length;
};
/*
* brief:
* concat_bdev_io_completion function is called by lower layers to notify raid
* module that particular bdev_io is completed.
* params:
* bdev_io - pointer to bdev io submitted to lower layers, like child io
* success - bdev_io status
* cb_arg - function callback context (parent raid_bdev_io)
* returns:
* none
*/
static void
concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct raid_bdev_io *raid_io = cb_arg;
spdk_bdev_free_io(bdev_io);
if (success) {
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
} else {
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static void concat_submit_rw_request(struct raid_bdev_io *raid_io);
static void
_concat_submit_rw_request(void *_raid_io)
{
struct raid_bdev_io *raid_io = _raid_io;
concat_submit_rw_request(raid_io);
}
/*
* brief:
* concat_submit_rw_request function is used to submit I/O to the correct
* member disk for concat bdevs.
* params:
* raid_io
* returns:
* none
*/
static void
concat_submit_rw_request(struct raid_bdev_io *raid_io)
{
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
struct concat_block_range *block_range = raid_bdev->module_private;
uint64_t pd_lba;
uint64_t pd_blocks;
int pd_idx;
int ret = 0;
struct raid_base_bdev_info *base_info;
struct spdk_io_channel *base_ch;
int i;
pd_idx = -1;
for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
if (block_range[i].start > bdev_io->u.bdev.offset_blocks) {
break;
}
pd_idx = i;
}
assert(pd_idx >= 0);
assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start);
pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start;
pd_blocks = bdev_io->u.bdev.num_blocks;
base_info = &raid_bdev->base_bdev_info[pd_idx];
if (base_info->desc == NULL) {
SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
assert(0);
}
/*
* Submit child io to bdev layer with using base bdev descriptors, base
* bdev lba, base bdev child io length in blocks, buffer, completion
* function and function callback context
*/
assert(raid_ch != NULL);
assert(raid_ch->base_channel);
base_ch = raid_ch->base_channel[pd_idx];
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch,
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
pd_lba, pd_blocks, concat_bdev_io_completion,
raid_io, bdev_io->u.bdev.ext_opts);
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch,
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
pd_lba, pd_blocks, concat_bdev_io_completion,
raid_io, bdev_io->u.bdev.ext_opts);
} else {
SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
assert(0);
}
if (ret == -ENOMEM) {
raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
_concat_submit_rw_request);
} else if (ret != 0) {
SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
assert(false);
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static void concat_submit_null_payload_request(struct raid_bdev_io *raid_io);
static void
_concat_submit_null_payload_request(void *_raid_io)
{
struct raid_bdev_io *raid_io = _raid_io;
concat_submit_null_payload_request(raid_io);
}
static void
concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct raid_bdev_io *raid_io = cb_arg;
raid_bdev_io_complete_part(raid_io, 1, success ?
SPDK_BDEV_IO_STATUS_SUCCESS :
SPDK_BDEV_IO_STATUS_FAILED);
spdk_bdev_free_io(bdev_io);
}
/*
* brief:
* concat_submit_null_payload_request function submits the next batch of
* io requests with range but without payload, like FLUSH and UNMAP, to member disks;
* it will submit as many as possible unless one base io request fails with -ENOMEM,
* in which case it will queue itself for later submission.
* params:
* bdev_io - pointer to parent bdev_io on raid bdev device
* returns:
* none
*/
static void
concat_submit_null_payload_request(struct raid_bdev_io *raid_io)
{
struct spdk_bdev_io *bdev_io;
struct raid_bdev *raid_bdev;
int ret;
struct raid_base_bdev_info *base_info;
struct spdk_io_channel *base_ch;
uint64_t pd_lba;
uint64_t pd_blocks;
uint64_t offset_blocks;
uint64_t num_blocks;
struct concat_block_range *block_range;
int i, start_idx, stop_idx;
bdev_io = spdk_bdev_io_from_ctx(raid_io);
raid_bdev = raid_io->raid_bdev;
block_range = raid_bdev->module_private;
offset_blocks = bdev_io->u.bdev.offset_blocks;
num_blocks = bdev_io->u.bdev.num_blocks;
start_idx = -1;
stop_idx = -1;
/*
* Go through all base bdevs, find the first bdev and the last bdev
*/
for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
/* skip the bdevs before the offset_blocks */
if (offset_blocks >= block_range[i].start + block_range[i].length) {
continue;
}
if (start_idx == -1) {
start_idx = i;
} else {
/*
* The offset_blocks might be at the middle of the first bdev.
* Besides the first bdev, the offset_blocks should be always
* at the start of the bdev.
*/
assert(offset_blocks == block_range[i].start);
}
pd_lba = offset_blocks - block_range[i].start;
pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
offset_blocks += pd_blocks;
num_blocks -= pd_blocks;
if (num_blocks == 0) {
stop_idx = i;
break;
}
}
assert(start_idx >= 0);
assert(stop_idx >= 0);
if (raid_io->base_bdev_io_remaining == 0) {
raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1;
}
offset_blocks = bdev_io->u.bdev.offset_blocks;
num_blocks = bdev_io->u.bdev.num_blocks;
for (i = start_idx; i <= stop_idx; i++) {
assert(offset_blocks >= block_range[i].start);
assert(offset_blocks < block_range[i].start + block_range[i].length);
pd_lba = offset_blocks - block_range[i].start;
pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
offset_blocks += pd_blocks;
num_blocks -= pd_blocks;
/*
* Skip the IOs we have submitted
*/
if (i < start_idx + raid_io->base_bdev_io_submitted) {
continue;
}
base_info = &raid_bdev->base_bdev_info[i];
base_ch = raid_io->raid_ch->base_channel[i];
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_UNMAP:
ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch,
pd_lba, pd_blocks,
concat_base_io_complete, raid_io);
break;
case SPDK_BDEV_IO_TYPE_FLUSH:
ret = spdk_bdev_flush_blocks(base_info->desc, base_ch,
pd_lba, pd_blocks,
concat_base_io_complete, raid_io);
break;
default:
SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
assert(false);
ret = -EIO;
}
if (ret == 0) {
raid_io->base_bdev_io_submitted++;
} else if (ret == -ENOMEM) {
raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
_concat_submit_null_payload_request);
return;
} else {
SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
assert(false);
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
return;
}
}
}
static int
concat_start(struct raid_bdev *raid_bdev)
{
uint64_t total_blockcnt = 0;
struct raid_base_bdev_info *base_info;
struct concat_block_range *block_range;
block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range));
if (!block_range) {
SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u",
raid_bdev->num_base_bdevs);
return -ENOMEM;
}
int idx = 0;
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift;
uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;
block_range[idx].start = total_blockcnt;
block_range[idx].length = pd_block_cnt;
total_blockcnt += pd_block_cnt;
idx++;
}
raid_bdev->module_private = block_range;
SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n",
total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
raid_bdev->bdev.blockcnt = total_blockcnt;
raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
raid_bdev->bdev.split_on_optimal_io_boundary = true;
return 0;
}
static bool
concat_stop(struct raid_bdev *raid_bdev)
{
struct concat_block_range *block_range = raid_bdev->module_private;
free(block_range);
return true;
}
static struct raid_bdev_module g_concat_module = {
.level = CONCAT,
.base_bdevs_min = 1,
.start = concat_start,
.stop = concat_stop,
.submit_rw_request = concat_submit_rw_request,
.submit_null_payload_request = concat_submit_null_payload_request,
};
RAID_MODULE_REGISTER(&g_concat_module)
SPDK_LOG_REGISTER_COMPONENT(bdev_concat)