Spdk/module/bdev/longhorn/bdev_longhorn_io.c

543 lines
19 KiB
C
Raw Normal View History

/*-
* BSD LICENSE
*
* Copyright (c) SUSE
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of SUSE nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "bdev_longhorn.h"
#include "bdev_longhorn_impl.h"
#include "spdk/env.h"
#include "spdk/thread.h"
#include "spdk/string.h"
#include "spdk/util.h"
#include "spdk/log.h"
/*
* brief:
* longhorn_bdev_io_completion function is called by lower layers to notify longhorn
* module that particular bdev_io is completed.
* params:
* bdev_io - pointer to bdev io submitted to lower layers, like child io
* success - bdev_io status
* cb_arg - function callback context (parent longhorn_bdev_io)
* returns:
* none
*/
static void
longhorn_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct longhorn_bdev_io *longhorn_io = cb_arg;
spdk_bdev_free_io(bdev_io);
if (success) {
//SPDK_ERRLOG("io op success\n");
longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_SUCCESS);
} else {
//SPDK_ERRLOG("io op failure\n");
longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static void
longhorn_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct longhorn_bdev_io *longhorn_io = cb_arg;
longhorn_bdev_io_complete_part(longhorn_io, 1, success ?
SPDK_BDEV_IO_STATUS_SUCCESS :
SPDK_BDEV_IO_STATUS_FAILED);
spdk_bdev_free_io(bdev_io);
}
static void
_longhorn_submit_rw_request(void *_longhorn_io)
{
struct longhorn_bdev_io *longhorn_io = _longhorn_io;
longhorn_submit_rw_request(longhorn_io);
}
static struct longhorn_base_io_channel *
_longhorn_get_next_readable_base_channel(struct longhorn_bdev_io_channel *longhorn_ch) {
/* Get next channel */
if (longhorn_ch->last_read_io_ch) {
longhorn_ch->last_read_io_ch = TAILQ_NEXT(longhorn_ch->last_read_io_ch, channels);
}
/* Search for a RW replica */
while (longhorn_ch->last_read_io_ch &&
longhorn_ch->last_read_io_ch->base_info->state != LONGHORN_BASE_BDEV_RW) {
longhorn_ch->last_read_io_ch = TAILQ_NEXT(longhorn_ch->last_read_io_ch, channels);
}
/* We found a RW replica before the end of the list */
if (longhorn_ch->last_read_io_ch) {
return longhorn_ch->last_read_io_ch;
}
/* Search for a RW replica from the beginning of the list */
longhorn_ch->last_read_io_ch = TAILQ_FIRST(&longhorn_ch->base_channels);
while (longhorn_ch->last_read_io_ch &&
longhorn_ch->last_read_io_ch->base_info->state != LONGHORN_BASE_BDEV_RW) {
longhorn_ch->last_read_io_ch = TAILQ_NEXT(longhorn_ch->last_read_io_ch, channels);
}
return longhorn_ch->last_read_io_ch;
}
static void
longhorn_submit_read_request(struct longhorn_bdev_io *longhorn_io)
{
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io);
struct longhorn_bdev_io_channel *longhorn_ch = longhorn_io->longhorn_ch;
struct longhorn_bdev *longhorn_bdev = longhorn_io->longhorn_bdev;
int ret = 0;
struct longhorn_base_bdev_info *base_info;
struct spdk_io_channel *base_ch;
struct longhorn_base_io_channel *base_channel;
assert(longhorn_ch != NULL);
base_channel = _longhorn_get_next_readable_base_channel(longhorn_ch);
if (!base_channel) {
SPDK_ERRLOG("bdev io submit with no base devices, it should not happen\n");
return;
}
base_ch = base_channel->base_channel;
base_info = base_channel->base_info;
//SPDK_ERRLOG("longhorn_submit_read_request base_info %p\n", base_info);
ret = spdk_bdev_readv_blocks(base_info->desc, base_ch,
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, longhorn_bdev_io_completion,
longhorn_io);
if (ret == -ENOMEM) {
longhorn_bdev_queue_io_wait(longhorn_io, base_info->bdev, base_ch,
_longhorn_submit_rw_request);
} else if (ret != 0) {
SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
assert(false);
longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static void
longhorn_submit_write_request(struct longhorn_bdev_io *longhorn_io)
{
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io);
struct longhorn_bdev_io_channel *longhorn_ch = longhorn_io->longhorn_ch;
struct longhorn_bdev *longhorn_bdev = longhorn_io->longhorn_bdev;
//uint8_t pd_idx;
int ret = 0;
struct longhorn_base_bdev_info *base_info;
struct spdk_io_channel *base_ch;
struct longhorn_base_io_channel *base_channel;
assert(longhorn_ch != NULL);
if (longhorn_io->base_bdev_io_remaining == 0) {
longhorn_io->base_bdev_io_remaining = longhorn_ch->num_channels;
}
TAILQ_FOREACH(base_channel, &longhorn_ch->base_channels, channels) {
//for (pd_idx = 0; pd_idx < longhorn_bdev->num_base_bdevs; pd_idx++) {
//base_ch = longhorn_ch->base_channel[pd_idx];
//base_info = &longhorn_bdev->base_bdev_info[pd_idx];
base_ch = base_channel->base_channel;
base_info = base_channel->base_info;
ret = spdk_bdev_writev_blocks(base_info->desc, base_ch,
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, longhorn_base_io_complete,
longhorn_io);
if (ret == -ENOMEM) {
SPDK_ERRLOG("enqueuing bdev io submit due to ENOMEM\n");
longhorn_bdev_queue_io_wait(longhorn_io, base_info->bdev, base_ch,
_longhorn_submit_rw_request);
} else if (ret != 0) {
SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
assert(false);
longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED);
}
atomic_fetch_add(&longhorn_bdev->io_ops, 1);
atomic_fetch_add(&longhorn_ch->io_ops, 1);
longhorn_io->submitted = true;
}
}
/*
* brief:
* longhorn_submit_rw_request function is used to submit I/O to the correct
* member disk for longhorn bdevs.
* params:
* longhorn_io
* returns:
* none
*/
void
longhorn_submit_rw_request(struct longhorn_bdev_io *longhorn_io)
{
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io);
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
longhorn_submit_read_request(longhorn_io);
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
longhorn_submit_write_request(longhorn_io);
} else {
SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
assert(0);
}
}
static void
_longhorn_submit_null_payload_request(void *_longhorn_io)
{
struct longhorn_bdev_io *longhorn_io = _longhorn_io;
longhorn_submit_null_payload_request(longhorn_io);
}
/*
* brief:
* longhorn_submit_null_payload_request function submits the next batch of
* io requests with range but without payload, like FLUSH and UNMAP, to member disks;
* it will submit as many as possible unless one base io request fails with -ENOMEM,
* in which case it will queue itself for later submission.
* params:
* bdev_io - pointer to parent bdev_io on longhorn bdev device
* returns:
* none
*/
void
longhorn_submit_null_payload_request(struct longhorn_bdev_io *longhorn_io)
{
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io);
struct longhorn_bdev_io_channel *longhorn_ch = longhorn_io->longhorn_ch;
struct longhorn_bdev *longhorn_bdev = longhorn_io->longhorn_bdev;
uint8_t pd_idx;
int ret = 0;
struct longhorn_base_bdev_info *base_info;
struct spdk_io_channel *base_ch;
struct longhorn_base_io_channel *base_channel;
assert(longhorn_ch != NULL);
if (longhorn_io->base_bdev_io_remaining == 0) {
longhorn_io->base_bdev_io_remaining = longhorn_bdev->num_base_bdevs;
}
TAILQ_FOREACH(base_channel, &longhorn_ch->base_channels, channels) {
//for (pd_idx = 0; pd_idx < longhorn_bdev->num_base_bdevs; pd_idx++) {
//base_ch = longhorn_ch->base_channel[pd_idx];
//base_info = &longhorn_bdev->base_bdev_info[pd_idx];
base_ch = base_channel->base_channel;
base_info = base_channel->base_info;
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_UNMAP:
ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch,
bdev_io->u.bdev.offset_blocks,
bdev_io->u.bdev.num_blocks,
longhorn_base_io_complete, longhorn_io);
break;
case SPDK_BDEV_IO_TYPE_FLUSH:
ret = spdk_bdev_flush_blocks(base_info->desc, base_ch,
bdev_io->u.bdev.offset_blocks,
bdev_io->u.bdev.num_blocks,
longhorn_base_io_complete, longhorn_io);
break;
default:
SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
assert(false);
ret = -EIO;
}
if (ret == -ENOMEM) {
longhorn_bdev_queue_io_wait(longhorn_io, base_info->bdev, base_ch,
_longhorn_submit_null_payload_request);
} else if (ret != 0) {
SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
assert(false);
longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED);
} else {
SPDK_ERRLOG("success\n");
}
}
}
int longhorn_start(struct longhorn_bdev *longhorn_bdev)
{
uint64_t min_blockcnt = UINT64_MAX;
uint64_t min_blocklen = UINT64_MAX;
struct longhorn_base_bdev_info *base_info;
TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) {
/* Calculate minimum block count and length from all base bdevs */
min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt);
min_blocklen = spdk_min(min_blocklen, base_info->bdev->blocklen);
}
TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) {
if (base_info->bdev->blockcnt != min_blockcnt) {
SPDK_ERRLOG("Not all disks on RAID 1 has same block count");
return -EINVAL;
}
if (base_info->bdev->blocklen != min_blocklen) {
SPDK_ERRLOG("Not all disks on RAID 1 has same block length");
return -EINVAL;
}
}
longhorn_bdev->bdev.blockcnt = min_blockcnt;
if (longhorn_bdev->num_base_bdevs > 1) {
longhorn_bdev->bdev.split_on_optimal_io_boundary = true;
} else {
/* Do not need to split reads/writes on single bdev RAID modules. */
longhorn_bdev->bdev.split_on_optimal_io_boundary = false;
}
return 0;
}
static void
longhorn_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct longhorn_bdev_io *longhorn_io = cb_arg;
spdk_bdev_free_io(bdev_io);
longhorn_bdev_io_complete_part(longhorn_io, 1, success ?
SPDK_BDEV_IO_STATUS_SUCCESS :
SPDK_BDEV_IO_STATUS_FAILED);
}
static void
longhorn_bdev_submit_reset_request(struct longhorn_bdev_io *longhorn_io);
static void
_longhorn_bdev_submit_reset_request(void *_longhorn_io)
{
struct longhorn_bdev_io *longhorn_io = _longhorn_io;
longhorn_bdev_submit_reset_request(longhorn_io);
}
/*
* brief:
* longhorn_bdev_submit_reset_request function submits reset requests
* to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
* which case it will queue it for later submission
* params:
* longhorn_io
* returns:
* none
*/
static void
longhorn_bdev_submit_reset_request(struct longhorn_bdev_io *longhorn_io)
{
struct longhorn_bdev_io_channel *longhorn_ch = longhorn_io->longhorn_ch;
struct longhorn_bdev *longhorn_bdev = longhorn_io->longhorn_bdev;
int ret;
struct longhorn_base_bdev_info *base_info;
struct spdk_io_channel *base_ch;
struct longhorn_base_io_channel *base_channel;
if (longhorn_io->base_bdev_io_remaining == 0) {
longhorn_io->base_bdev_io_remaining = longhorn_bdev->num_base_bdevs;
}
TAILQ_FOREACH(base_channel, &longhorn_ch->base_channels, channels) {
base_ch = base_channel->base_channel;
base_info = base_channel->base_info;
ret = spdk_bdev_reset(base_info->desc, base_ch,
longhorn_base_bdev_reset_complete, longhorn_io);
if (ret == 0) {
longhorn_io->base_bdev_io_submitted++;
} else if (ret == -ENOMEM) {
longhorn_bdev_queue_io_wait(longhorn_io, base_info->bdev, base_ch,
_longhorn_bdev_submit_reset_request);
return;
} else {
SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
assert(false);
longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED);
return;
}
}
}
/*
* brief:
* Callback function to spdk_bdev_io_get_buf.
* params:
* ch - pointer to longhorn bdev io channel
* bdev_io - pointer to parent bdev_io on longhorn bdev device
* success - True if buffer is allocated or false otherwise.
* returns:
* none
*/
static void
longhorn_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
bool success)
{
struct longhorn_bdev_io *longhorn_io = (struct longhorn_bdev_io *)bdev_io->driver_ctx;
if (!success) {
longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED);
return;
}
longhorn_submit_rw_request(longhorn_io);
}
/*
* brief:
* longhorn_bdev_submit_request function is the submit_request function pointer of
* longhorn bdev function table. This is used to submit the io on longhorn_bdev to below
* layers.
* params:
* ch - pointer to longhorn bdev io channel
* bdev_io - pointer to parent bdev_io on longhorn bdev device
* returns:
* none
*/
static void
_longhorn_bdev_submit_request(struct longhorn_bdev_io *longhorn_io)
{
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io);
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_READ:
spdk_bdev_io_get_buf(bdev_io, longhorn_bdev_get_buf_cb,
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
break;
case SPDK_BDEV_IO_TYPE_WRITE:
longhorn_submit_rw_request(longhorn_io);
break;
case SPDK_BDEV_IO_TYPE_RESET:
longhorn_bdev_submit_reset_request(longhorn_io);
break;
case SPDK_BDEV_IO_TYPE_FLUSH:
case SPDK_BDEV_IO_TYPE_UNMAP:
longhorn_submit_null_payload_request(longhorn_io);
break;
default:
SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED);
break;
}
}
void
longhorn_pause_queue_io_wait(struct longhorn_bdev_io *longhorn_io)
{
longhorn_io->waitq_entry.cb_arg = longhorn_io;
longhorn_io->longhorn_ch->queue_len++;
SPDK_ERRLOG("adding to queue: %d\n", longhorn_io->longhorn_ch->queue_len);
TAILQ_INSERT_TAIL(&longhorn_io->longhorn_ch->io_wait_queue, &longhorn_io->waitq_entry, link);
}
void
longhorn_pause_queue_playback(struct longhorn_bdev_io_channel *longhorn_ch)
{
while (!TAILQ_EMPTY(&longhorn_ch->io_wait_queue)) {
struct spdk_bdev_io_wait_entry *entry;
entry = TAILQ_FIRST(&longhorn_ch->io_wait_queue);
TAILQ_REMOVE(&longhorn_ch->io_wait_queue, entry, link);
_longhorn_bdev_submit_request(entry->cb_arg);
longhorn_ch->queue_len--;
SPDK_ERRLOG("removing to queue: %d\n", longhorn_ch->queue_len);
}
}
void
longhorn_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{
struct longhorn_bdev_io *longhorn_io = (struct longhorn_bdev_io *)bdev_io->driver_ctx;
longhorn_io->longhorn_bdev = bdev_io->bdev->ctxt;
longhorn_io->longhorn_ch = spdk_io_channel_get_ctx(ch);
longhorn_io->base_bdev_io_remaining = 0;
longhorn_io->base_bdev_io_submitted = 0;
longhorn_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
if (longhorn_io->longhorn_ch->paused) {
longhorn_pause_queue_io_wait(longhorn_io);
return;
}
_longhorn_bdev_submit_request(longhorn_io);
}