diff --git a/module/bdev/raid/Makefile b/module/bdev/raid/Makefile index fb6ef8d8b..d8114517f 100644 --- a/module/bdev/raid/Makefile +++ b/module/bdev/raid/Makefile @@ -35,7 +35,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/ -C_SRCS = bdev_raid.c bdev_raid_rpc.c +C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c LIBNAME = bdev_raid include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index 60a236987..2f428720a 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -256,31 +256,6 @@ raid_bdev_destruct(void *ctxt) return 0; } -/* - * brief: - * raid_bdev_io_completion function is called by lower layers to notify raid - * module that particular bdev_io is completed. - * params: - * bdev_io - pointer to bdev io submitted to lower layers, like child io - * success - bdev_io status - * cb_arg - function callback context, like parent io pointer - * returns: - * none - */ -static void -raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) -{ - struct spdk_bdev_io *parent_io = cb_arg; - - spdk_bdev_free_io(bdev_io); - - if (success) { - spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS); - } else { - spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED); - } -} - /* * brief: * raid_bdev_base_io_completion is the completion callback for member disk requests @@ -291,7 +266,7 @@ raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg * returns: * none */ -static void +void raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { struct spdk_bdev_io *parent_io = cb_arg; @@ -309,134 +284,6 @@ raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *c } } -static void -raid_bdev_queue_io_wait(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx, - spdk_bdev_io_wait_cb cb_fn, int ret); - -static void -raid0_waitq_io_process(void *ctx); - -/* - * brief: - * raid0_submit_rw_request function is used to submit I/O to the correct - * member disk for raid0 bdevs. - * params: - * bdev_io - parent bdev io - * start_strip - start strip number of this io - * returns: - * none - */ -static void -raid0_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip) -{ - struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; - struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); - struct raid_bdev *raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; - uint64_t pd_strip; - uint32_t offset_in_strip; - uint64_t pd_lba; - uint64_t pd_blocks; - uint8_t pd_idx; - int ret = 0; - - pd_strip = start_strip / raid_bdev->num_base_bdevs; - pd_idx = start_strip % raid_bdev->num_base_bdevs; - offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1); - pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip; - pd_blocks = bdev_io->u.bdev.num_blocks; - if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) { - SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); - assert(0); - } - - /* - * Submit child io to bdev layer with using base bdev descriptors, base - * bdev lba, base bdev child io length in blocks, buffer, completion - * function and function callback context - */ - assert(raid_ch != NULL); - assert(raid_ch->base_channel); - if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { - ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc, - raid_ch->base_channel[pd_idx], - bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, - pd_lba, pd_blocks, raid_bdev_io_completion, - bdev_io); - } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { - ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc, - raid_ch->base_channel[pd_idx], - bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, - pd_lba, pd_blocks, raid_bdev_io_completion, - bdev_io); - } else { - SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); - assert(0); - } - - if (ret != 0) { - raid_bdev_queue_io_wait(bdev_io, pd_idx, raid0_waitq_io_process, ret); - } -} - -/* - * brief: - * raid0_waitq_io_process function is the callback function - * registered by raid bdev module to bdev when bdev_io was unavailable - * for raid0 bdevs. - * params: - * ctx - pointer to raid_bdev_io - * returns: - * none - */ -static void -raid0_waitq_io_process(void *ctx) -{ - struct spdk_bdev_io *bdev_io = ctx; - struct raid_bdev *raid_bdev; - uint64_t start_strip; - - /* - * Try to submit childs of parent bdev io. If failed due to resource - * crunch then break the loop and don't try to process other queued IOs. - */ - raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; - start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; - raid0_submit_rw_request(bdev_io, start_strip); -} - -/* - * brief: - * raid0_start_rw_request function is the submit_request function for - * read/write requests for raid0 bdevs. - * params: - * ch - pointer to raid bdev io channel - * bdev_io - pointer to parent bdev_io on raid bdev device - * returns: - * none - */ -static void -raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) -{ - struct raid_bdev_io *raid_io; - struct raid_bdev *raid_bdev; - uint64_t start_strip = 0; - uint64_t end_strip = 0; - - raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; - raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; - raid_io->ch = ch; - start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; - end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> - raid_bdev->strip_size_shift; - if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) { - assert(false); - SPDK_ERRLOG("I/O spans strip boundary!\n"); - spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); - return; - } - raid0_submit_rw_request(bdev_io, start_strip); -} - /* * brief: * raid_bdev_queue_io_wait function processes the IO which failed to submit. @@ -449,7 +296,7 @@ raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) * returns: * none */ -static void +void raid_bdev_queue_io_wait(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx, spdk_bdev_io_wait_cb cb_fn, int ret) { @@ -539,163 +386,6 @@ _raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io _raid_bdev_submit_reset_request_next(bdev_io); } -static inline void -_raid0_get_io_range(struct raid_bdev_io_range *io_range, - uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift, - uint64_t offset_blocks, uint64_t num_blocks) -{ - uint64_t start_strip; - uint64_t end_strip; - - io_range->strip_size = strip_size; - - /* The start and end strip index in raid0 bdev scope */ - start_strip = offset_blocks >> strip_size_shift; - end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift; - io_range->start_strip_in_disk = start_strip / num_base_bdevs; - io_range->end_strip_in_disk = end_strip / num_base_bdevs; - - /* The first strip may have unaligned start LBA offset. - * The end strip may have unaligned end LBA offset. - * Strips between them certainly have aligned offset and length to boundaries. - */ - io_range->start_offset_in_strip = offset_blocks % strip_size; - io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size; - - /* The base bdev indexes in which start and end strips are located */ - io_range->start_disk = start_strip % num_base_bdevs; - io_range->end_disk = end_strip % num_base_bdevs; - - /* Calculate how many base_bdevs are involved in io operation. - * Number of base bdevs involved is between 1 and num_base_bdevs. - * It will be 1 if the first strip and last strip are the same one. - */ - io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs); -} - -static inline void -_raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx, - uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk) -{ - uint64_t n_strips_in_disk; - uint64_t start_offset_in_disk; - uint64_t end_offset_in_disk; - uint64_t offset_in_disk; - uint64_t nblocks_in_disk; - uint64_t start_strip_in_disk; - uint64_t end_strip_in_disk; - - start_strip_in_disk = io_range->start_strip_in_disk; - if (disk_idx < io_range->start_disk) { - start_strip_in_disk += 1; - } - - end_strip_in_disk = io_range->end_strip_in_disk; - if (disk_idx > io_range->end_disk) { - end_strip_in_disk -= 1; - } - - assert(end_strip_in_disk >= start_strip_in_disk); - n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1; - - if (disk_idx == io_range->start_disk) { - start_offset_in_disk = io_range->start_offset_in_strip; - } else { - start_offset_in_disk = 0; - } - - if (disk_idx == io_range->end_disk) { - end_offset_in_disk = io_range->end_offset_in_strip; - } else { - end_offset_in_disk = io_range->strip_size - 1; - } - - offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size; - nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size - + end_offset_in_disk - start_offset_in_disk + 1; - - SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, - "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n", - io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk); - - *_offset_in_disk = offset_in_disk; - *_nblocks_in_disk = nblocks_in_disk; -} - -/* - * brief: - * _raid_bdev_submit_null_payload_request_next function submits the next batch of - * io requests with range but without payload, like FLUSH and UNMAP, to member disks; - * it will submit as many as possible unless one base io request fails with -ENOMEM, - * in which case it will queue itself for later submission. - * params: - * bdev_io - pointer to parent bdev_io on raid bdev device - * returns: - * none - */ -static void -_raid_bdev_submit_null_payload_request_next(void *_bdev_io) -{ - struct spdk_bdev_io *bdev_io = _bdev_io; - struct raid_bdev_io *raid_io; - struct raid_bdev *raid_bdev; - struct raid_bdev_io_channel *raid_ch; - struct raid_bdev_io_range io_range; - int ret; - - raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; - raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; - raid_ch = spdk_io_channel_get_ctx(raid_io->ch); - - _raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs, - raid_bdev->strip_size, raid_bdev->strip_size_shift, - bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); - - raid_io->base_bdev_io_expected = io_range.n_disks_involved; - - while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) { - uint8_t disk_idx; - uint64_t offset_in_disk; - uint64_t nblocks_in_disk; - - /* base_bdev is started from start_disk to end_disk. - * It is possible that index of start_disk is larger than end_disk's. - */ - disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; - - _raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk); - - switch (bdev_io->type) { - case SPDK_BDEV_IO_TYPE_UNMAP: - ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc, - raid_ch->base_channel[disk_idx], - offset_in_disk, nblocks_in_disk, - raid_bdev_base_io_completion, bdev_io); - break; - - case SPDK_BDEV_IO_TYPE_FLUSH: - ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc, - raid_ch->base_channel[disk_idx], - offset_in_disk, nblocks_in_disk, - raid_bdev_base_io_completion, bdev_io); - break; - - default: - SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); - assert(false); - ret = -EIO; - } - - if (ret == 0) { - raid_io->base_bdev_io_submitted++; - } else { - raid_bdev_queue_io_wait(bdev_io, disk_idx, - _raid_bdev_submit_null_payload_request_next, ret); - return; - } - } -} - /* * brief: * _raid_bdev_submit_null_payload_request function is the submit_request function @@ -720,7 +410,7 @@ _raid_bdev_submit_null_payload_request(struct spdk_io_channel *ch, struct spdk_b SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev: type %d, range (0x%lx, 0x%lx)\n", bdev_io->type, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); - _raid_bdev_submit_null_payload_request_next(bdev_io); + raid0_submit_null_payload_request(bdev_io); } /* diff --git a/module/bdev/raid/bdev_raid.h b/module/bdev/raid/bdev_raid.h index b34f0d3d7..861565664 100644 --- a/module/bdev/raid/bdev_raid.h +++ b/module/bdev/raid/bdev_raid.h @@ -100,18 +100,6 @@ struct raid_bdev_io { uint8_t base_bdev_io_status; }; -/* raid0 IO range */ -struct raid_bdev_io_range { - uint64_t strip_size; - uint64_t start_strip_in_disk; - uint64_t end_strip_in_disk; - uint64_t start_offset_in_strip; - uint64_t end_offset_in_strip; - uint8_t start_disk; - uint8_t end_disk; - uint8_t n_disks_involved; -}; - /* * raid_bdev is the single entity structure which contains SPDK block device * and the information related to any raid bdev either configured or @@ -247,4 +235,14 @@ int raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, void raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg); struct raid_bdev_config *raid_bdev_config_find_by_name(const char *raid_name); +void +raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); +void +raid0_submit_null_payload_request(void *_bdev_io); +void +raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); +void +raid_bdev_queue_io_wait(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx, + spdk_bdev_io_wait_cb cb_fn, int ret); + #endif /* SPDK_BDEV_RAID_INTERNAL_H */ diff --git a/module/bdev/raid/raid0.c b/module/bdev/raid/raid0.c new file mode 100644 index 000000000..9e0907c52 --- /dev/null +++ b/module/bdev/raid/raid0.c @@ -0,0 +1,361 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bdev_raid.h" + +#include "spdk/env.h" +#include "spdk/io_channel.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#include "spdk_internal/log.h" + +/* + * brief: + * raid0_bdev_io_completion function is called by lower layers to notify raid + * module that particular bdev_io is completed. + * params: + * bdev_io - pointer to bdev io submitted to lower layers, like child io + * success - bdev_io status + * cb_arg - function callback context, like parent io pointer + * returns: + * none + */ +static void +raid0_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *parent_io = cb_arg; + + spdk_bdev_free_io(bdev_io); + + if (success) { + spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS); + } else { + spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static void +raid0_waitq_io_process(void *ctx); + +/* + * brief: + * raid0_submit_rw_request function is used to submit I/O to the correct + * member disk for raid0 bdevs. + * params: + * bdev_io - parent bdev io + * start_strip - start strip number of this io + * returns: + * none + */ +static void +raid0_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip) +{ + struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; + struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); + struct raid_bdev *raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; + uint64_t pd_strip; + uint32_t offset_in_strip; + uint64_t pd_lba; + uint64_t pd_blocks; + uint8_t pd_idx; + int ret = 0; + + pd_strip = start_strip / raid_bdev->num_base_bdevs; + pd_idx = start_strip % raid_bdev->num_base_bdevs; + offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1); + pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip; + pd_blocks = bdev_io->u.bdev.num_blocks; + if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) { + SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); + assert(0); + } + + /* + * Submit child io to bdev layer with using base bdev descriptors, base + * bdev lba, base bdev child io length in blocks, buffer, completion + * function and function callback context + */ + assert(raid_ch != NULL); + assert(raid_ch->base_channel); + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { + ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc, + raid_ch->base_channel[pd_idx], + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + pd_lba, pd_blocks, raid0_bdev_io_completion, + bdev_io); + } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc, + raid_ch->base_channel[pd_idx], + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + pd_lba, pd_blocks, raid0_bdev_io_completion, + bdev_io); + } else { + SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); + assert(0); + } + + if (ret) { + raid_bdev_queue_io_wait(bdev_io, pd_idx, raid0_waitq_io_process, ret); + } +} + +/* + * brief: + * raid0_waitq_io_process function is the callback function + * registered by raid bdev module to bdev when bdev_io was unavailable + * for raid0 bdevs. + * params: + * ctx - pointer to raid_bdev_io + * returns: + * none + */ +static void +raid0_waitq_io_process(void *ctx) +{ + struct spdk_bdev_io *bdev_io = ctx; + struct raid_bdev *raid_bdev; + uint64_t start_strip; + + /* + * Try to submit childs of parent bdev io. If failed due to resource + * crunch then break the loop and don't try to process other queued IOs. + */ + raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; + start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + raid0_submit_rw_request(bdev_io, start_strip); +} + +/* + * brief: + * raid0_start_rw_request function is the submit_request function for + * read/write requests for raid0 bdevs. + * params: + * ch - pointer to raid bdev io channel + * bdev_io - pointer to parent bdev_io on raid bdev device + * returns: + * none + */ +void +raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + struct raid_bdev_io *raid_io; + struct raid_bdev *raid_bdev; + uint64_t start_strip = 0; + uint64_t end_strip = 0; + + raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; + raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; + raid_io->ch = ch; + start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) { + assert(false); + SPDK_ERRLOG("I/O spans strip boundary!\n"); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + raid0_submit_rw_request(bdev_io, start_strip); +} + +/* raid0 IO range */ +struct raid_bdev_io_range { + uint64_t strip_size; + uint64_t start_strip_in_disk; + uint64_t end_strip_in_disk; + uint64_t start_offset_in_strip; + uint64_t end_offset_in_strip; + uint8_t start_disk; + uint8_t end_disk; + uint8_t n_disks_involved; +}; + +static inline void +_raid0_get_io_range(struct raid_bdev_io_range *io_range, + uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift, + uint64_t offset_blocks, uint64_t num_blocks) +{ + uint64_t start_strip; + uint64_t end_strip; + + io_range->strip_size = strip_size; + + /* The start and end strip index in raid0 bdev scope */ + start_strip = offset_blocks >> strip_size_shift; + end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift; + io_range->start_strip_in_disk = start_strip / num_base_bdevs; + io_range->end_strip_in_disk = end_strip / num_base_bdevs; + + /* The first strip may have unaligned start LBA offset. + * The end strip may have unaligned end LBA offset. + * Strips between them certainly have aligned offset and length to boundaries. + */ + io_range->start_offset_in_strip = offset_blocks % strip_size; + io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size; + + /* The base bdev indexes in which start and end strips are located */ + io_range->start_disk = start_strip % num_base_bdevs; + io_range->end_disk = end_strip % num_base_bdevs; + + /* Calculate how many base_bdevs are involved in io operation. + * Number of base bdevs involved is between 1 and num_base_bdevs. + * It will be 1 if the first strip and last strip are the same one. + */ + io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs); +} + +static inline void +_raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx, + uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk) +{ + uint64_t n_strips_in_disk; + uint64_t start_offset_in_disk; + uint64_t end_offset_in_disk; + uint64_t offset_in_disk; + uint64_t nblocks_in_disk; + uint64_t start_strip_in_disk; + uint64_t end_strip_in_disk; + + start_strip_in_disk = io_range->start_strip_in_disk; + if (disk_idx < io_range->start_disk) { + start_strip_in_disk += 1; + } + + end_strip_in_disk = io_range->end_strip_in_disk; + if (disk_idx > io_range->end_disk) { + end_strip_in_disk -= 1; + } + + assert(end_strip_in_disk >= start_strip_in_disk); + n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1; + + if (disk_idx == io_range->start_disk) { + start_offset_in_disk = io_range->start_offset_in_strip; + } else { + start_offset_in_disk = 0; + } + + if (disk_idx == io_range->end_disk) { + end_offset_in_disk = io_range->end_offset_in_strip; + } else { + end_offset_in_disk = io_range->strip_size - 1; + } + + offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size; + nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size + + end_offset_in_disk - start_offset_in_disk + 1; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID0, + "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n", + io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk); + + *_offset_in_disk = offset_in_disk; + *_nblocks_in_disk = nblocks_in_disk; +} + +/* + * brief: + * raid0_submit_null_payload_request function submits the next batch of + * io requests with range but without payload, like FLUSH and UNMAP, to member disks; + * it will submit as many as possible unless one base io request fails with -ENOMEM, + * in which case it will queue itself for later submission. + * params: + * bdev_io - pointer to parent bdev_io on raid bdev device + * returns: + * none + */ +void +raid0_submit_null_payload_request(void *_bdev_io) +{ + struct spdk_bdev_io *bdev_io = _bdev_io; + struct raid_bdev_io *raid_io; + struct raid_bdev *raid_bdev; + struct raid_bdev_io_channel *raid_ch; + struct raid_bdev_io_range io_range; + int ret; + + raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; + raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; + raid_ch = spdk_io_channel_get_ctx(raid_io->ch); + + _raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs, + raid_bdev->strip_size, raid_bdev->strip_size_shift, + bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); + + raid_io->base_bdev_io_expected = io_range.n_disks_involved; + + while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) { + uint8_t disk_idx; + uint64_t offset_in_disk; + uint64_t nblocks_in_disk; + + /* base_bdev is started from start_disk to end_disk. + * It is possible that index of start_disk is larger than end_disk's. + */ + disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; + + _raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk); + + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_UNMAP: + ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc, + raid_ch->base_channel[disk_idx], + offset_in_disk, nblocks_in_disk, + raid_bdev_base_io_completion, bdev_io); + break; + + case SPDK_BDEV_IO_TYPE_FLUSH: + ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc, + raid_ch->base_channel[disk_idx], + offset_in_disk, nblocks_in_disk, + raid_bdev_base_io_completion, bdev_io); + break; + + default: + SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); + assert(false); + ret = -EIO; + } + + if (ret == 0) { + raid_io->base_bdev_io_submitted++; + } else { + raid_bdev_queue_io_wait(bdev_io, disk_idx, + raid0_submit_null_payload_request, ret); + return; + } + } +} + +SPDK_LOG_REGISTER_COMPONENT("bdev_raid0", SPDK_LOG_BDEV_RAID0) diff --git a/test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c b/test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c index 3d040fe81..36ae2b4c6 100644 --- a/test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c +++ b/test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c @@ -37,6 +37,7 @@ #include "spdk_internal/mock.h" #include "bdev/raid/bdev_raid.c" #include "bdev/raid/bdev_raid_rpc.c" +#include "bdev/raid/raid0.c" #define MAX_BASE_DRIVES 32 #define MAX_RAIDS 2