This is more accurate to what they are, and will make defining library dependencies much simpler. This change in directory does not affect the final placement of naming of libraries at the end of time. Change-Id: Ic48a9233dff564e39ce357a9ea0a111ea2b6414b Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/465454 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Broadcom SPDK FC-NVMe CI <spdk-ci.pdl@broadcom.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
579 lines
14 KiB
C
579 lines
14 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright (c) Intel Corporation.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "bdev_uring.h"
|
|
|
|
#include "spdk/stdinc.h"
|
|
|
|
#include "spdk/barrier.h"
|
|
#include "spdk/bdev.h"
|
|
#include "spdk/conf.h"
|
|
#include "spdk/env.h"
|
|
#include "spdk/fd.h"
|
|
#include "spdk/likely.h"
|
|
#include "spdk/thread.h"
|
|
#include "spdk/json.h"
|
|
#include "spdk/util.h"
|
|
#include "spdk/string.h"
|
|
|
|
#include "spdk_internal/log.h"
|
|
|
|
#include <liburing.h>
|
|
|
|
struct bdev_uring_io_channel {
|
|
struct bdev_uring_group_channel *group_ch;
|
|
};
|
|
|
|
struct bdev_uring_group_channel {
|
|
uint64_t io_inflight;
|
|
uint64_t io_pending;
|
|
struct spdk_poller *poller;
|
|
struct io_uring uring;
|
|
};
|
|
|
|
struct bdev_uring_task {
|
|
uint64_t len;
|
|
struct bdev_uring_io_channel *ch;
|
|
TAILQ_ENTRY(bdev_uring_task) link;
|
|
};
|
|
|
|
struct bdev_uring {
|
|
struct spdk_bdev bdev;
|
|
char *filename;
|
|
int fd;
|
|
TAILQ_ENTRY(bdev_uring) link;
|
|
};
|
|
|
|
static int bdev_uring_init(void);
|
|
static void bdev_uring_fini(void);
|
|
static void uring_free_bdev(struct bdev_uring *uring);
|
|
static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head;
|
|
|
|
#define SPDK_URING_QUEUE_DEPTH 512
|
|
#define MAX_EVENTS_PER_POLL 32
|
|
|
|
static int
|
|
bdev_uring_get_ctx_size(void)
|
|
{
|
|
return sizeof(struct bdev_uring_task);
|
|
}
|
|
|
|
static struct spdk_bdev_module uring_if = {
|
|
.name = "uring",
|
|
.module_init = bdev_uring_init,
|
|
.module_fini = bdev_uring_fini,
|
|
.config_text = NULL,
|
|
.get_ctx_size = bdev_uring_get_ctx_size,
|
|
};
|
|
|
|
SPDK_BDEV_MODULE_REGISTER(uring, &uring_if)
|
|
|
|
static int
|
|
bdev_uring_open(struct bdev_uring *bdev)
|
|
{
|
|
int fd;
|
|
|
|
fd = open(bdev->filename, O_NOATIME | O_DIRECT);
|
|
if (fd < 0) {
|
|
SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
|
|
bdev->filename, errno, spdk_strerror(errno));
|
|
bdev->fd = -1;
|
|
return -1;
|
|
}
|
|
|
|
bdev->fd = fd;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
bdev_uring_close(struct bdev_uring *bdev)
|
|
{
|
|
int rc;
|
|
|
|
if (bdev->fd == -1) {
|
|
return 0;
|
|
}
|
|
|
|
rc = close(bdev->fd);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
|
|
bdev->fd, errno, spdk_strerror(errno));
|
|
return -1;
|
|
}
|
|
|
|
bdev->fd = -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int64_t
|
|
bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch,
|
|
struct bdev_uring_task *uring_task,
|
|
struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
|
|
{
|
|
struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch);
|
|
struct bdev_uring_group_channel *group_ch = uring_ch->group_ch;
|
|
struct io_uring_sqe *sqe;
|
|
|
|
sqe = io_uring_get_sqe(&group_ch->uring);
|
|
io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset);
|
|
io_uring_sqe_set_data(sqe, uring_task);
|
|
uring_task->len = nbytes;
|
|
uring_task->ch = uring_ch;
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_URING, "read %d iovs size %lu to off: %#lx\n",
|
|
iovcnt, nbytes, offset);
|
|
|
|
group_ch->io_pending++;
|
|
return nbytes;
|
|
}
|
|
|
|
static int64_t
|
|
bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch,
|
|
struct bdev_uring_task *uring_task,
|
|
struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset)
|
|
{
|
|
struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch);
|
|
struct bdev_uring_group_channel *group_ch = uring_ch->group_ch;
|
|
struct io_uring_sqe *sqe;
|
|
|
|
sqe = io_uring_get_sqe(&group_ch->uring);
|
|
io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset);
|
|
io_uring_sqe_set_data(sqe, uring_task);
|
|
uring_task->ch = uring_ch;
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_URING, "write %d iovs size %lu from off: %#lx\n",
|
|
iovcnt, nbytes, offset);
|
|
|
|
group_ch->io_pending++;
|
|
return nbytes;
|
|
}
|
|
|
|
static int
|
|
bdev_uring_destruct(void *ctx)
|
|
{
|
|
struct bdev_uring *uring = ctx;
|
|
int rc = 0;
|
|
|
|
TAILQ_REMOVE(&g_uring_bdev_head, uring, link);
|
|
rc = bdev_uring_close(uring);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("bdev_uring_close() failed\n");
|
|
}
|
|
spdk_io_device_unregister(uring, NULL);
|
|
uring_free_bdev(uring);
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
bdev_uring_reap(struct io_uring *ring, int max)
|
|
{
|
|
int i, count, ret;
|
|
struct io_uring_cqe *cqe;
|
|
struct bdev_uring_task *uring_task;
|
|
enum spdk_bdev_io_status status;
|
|
|
|
count = 0;
|
|
for (i = 0; i < max; i++) {
|
|
ret = io_uring_peek_cqe(ring, &cqe);
|
|
if (ret != 0) {
|
|
return ret;
|
|
}
|
|
|
|
if (cqe == NULL) {
|
|
return count;
|
|
}
|
|
|
|
uring_task = (struct bdev_uring_task *)cqe->user_data;
|
|
if (cqe->res != (signed)uring_task->len) {
|
|
status = SPDK_BDEV_IO_STATUS_FAILED;
|
|
} else {
|
|
status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
|
}
|
|
|
|
uring_task->ch->group_ch->io_inflight--;
|
|
io_uring_cqe_seen(ring, cqe);
|
|
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status);
|
|
count++;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
static int
|
|
bdev_uring_group_poll(void *arg)
|
|
{
|
|
struct bdev_uring_group_channel *group_ch = arg;
|
|
int to_complete, to_submit;
|
|
int count, ret;
|
|
|
|
to_submit = group_ch->io_pending;
|
|
to_complete = group_ch->io_inflight;
|
|
|
|
ret = 0;
|
|
if (to_submit > 0) {
|
|
/* If there are I/O to submit, use io_uring_submit here.
|
|
* It will automatically call io_uring_enter appropriately. */
|
|
ret = io_uring_submit(&group_ch->uring);
|
|
group_ch->io_pending = 0;
|
|
group_ch->io_inflight += to_submit;
|
|
} else if (to_complete > 0) {
|
|
/* If there are I/O in flight but none to submit, we need to
|
|
* call io_uring_enter ourselves. */
|
|
ret = io_uring_enter(group_ch->uring.ring_fd, 0, 0,
|
|
IORING_ENTER_GETEVENTS, NULL);
|
|
}
|
|
|
|
if (ret < 0) {
|
|
return 1;
|
|
}
|
|
|
|
count = 0;
|
|
if (to_complete > 0) {
|
|
count = bdev_uring_reap(&group_ch->uring, to_complete);
|
|
}
|
|
|
|
return (count + to_submit);
|
|
}
|
|
|
|
static void bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
|
|
bool success)
|
|
{
|
|
if (!success) {
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
return;
|
|
}
|
|
|
|
switch (bdev_io->type) {
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt,
|
|
ch,
|
|
(struct bdev_uring_task *)bdev_io->driver_ctx,
|
|
bdev_io->u.bdev.iovs,
|
|
bdev_io->u.bdev.iovcnt,
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
|
|
bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
|
|
break;
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt,
|
|
ch,
|
|
(struct bdev_uring_task *)bdev_io->driver_ctx,
|
|
bdev_io->u.bdev.iovs,
|
|
bdev_io->u.bdev.iovcnt,
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
|
|
bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
|
|
break;
|
|
default:
|
|
SPDK_ERRLOG("Wrong io type\n");
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
|
|
{
|
|
switch (bdev_io->type) {
|
|
/* Read and write operations must be performed on buffers aligned to
|
|
* bdev->required_alignment. If user specified unaligned buffers,
|
|
* get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb,
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
|
|
return 0;
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static void bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
|
|
{
|
|
if (_bdev_uring_submit_request(ch, bdev_io) < 0) {
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
}
|
|
}
|
|
|
|
static bool
|
|
bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
|
|
{
|
|
switch (io_type) {
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static int
|
|
bdev_uring_create_cb(void *io_device, void *ctx_buf)
|
|
{
|
|
struct bdev_uring_io_channel *ch = ctx_buf;
|
|
|
|
ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bdev_uring_destroy_cb(void *io_device, void *ctx_buf)
|
|
{
|
|
struct bdev_uring_io_channel *ch = ctx_buf;
|
|
|
|
spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
|
|
}
|
|
|
|
static struct spdk_io_channel *
|
|
bdev_uring_get_io_channel(void *ctx)
|
|
{
|
|
struct bdev_uring *uring = ctx;
|
|
|
|
return spdk_get_io_channel(uring);
|
|
}
|
|
|
|
|
|
static const struct spdk_bdev_fn_table uring_fn_table = {
|
|
.destruct = bdev_uring_destruct,
|
|
.submit_request = bdev_uring_submit_request,
|
|
.io_type_supported = bdev_uring_io_type_supported,
|
|
.get_io_channel = bdev_uring_get_io_channel,
|
|
};
|
|
|
|
static void uring_free_bdev(struct bdev_uring *uring)
|
|
{
|
|
if (uring == NULL) {
|
|
return;
|
|
}
|
|
free(uring->filename);
|
|
free(uring->bdev.name);
|
|
free(uring);
|
|
}
|
|
|
|
static int
|
|
bdev_uring_group_create_cb(void *io_device, void *ctx_buf)
|
|
{
|
|
struct bdev_uring_group_channel *ch = ctx_buf;
|
|
|
|
if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, IORING_SETUP_IOPOLL) < 0) {
|
|
SPDK_ERRLOG("uring I/O context setup failure\n");
|
|
return -1;
|
|
}
|
|
|
|
ch->poller = spdk_poller_register(bdev_uring_group_poll, ch, 0);
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf)
|
|
{
|
|
struct bdev_uring_group_channel *ch = ctx_buf;
|
|
|
|
close(ch->uring.ring_fd);
|
|
io_uring_queue_exit(&ch->uring);
|
|
|
|
spdk_poller_unregister(&ch->poller);
|
|
}
|
|
|
|
struct spdk_bdev *
|
|
create_uring_bdev(const char *name, const char *filename)
|
|
{
|
|
struct bdev_uring *uring;
|
|
uint32_t block_size;
|
|
uint64_t bdev_size;
|
|
int rc;
|
|
|
|
uring = calloc(1, sizeof(*uring));
|
|
if (!uring) {
|
|
SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n");
|
|
return NULL;
|
|
}
|
|
|
|
uring->filename = strdup(filename);
|
|
if (!uring->filename) {
|
|
goto error_return;
|
|
}
|
|
|
|
if (bdev_uring_open(uring)) {
|
|
SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, uring->fd, errno);
|
|
goto error_return;
|
|
}
|
|
|
|
bdev_size = spdk_fd_get_size(uring->fd);
|
|
|
|
uring->bdev.name = strdup(name);
|
|
if (!uring->bdev.name) {
|
|
goto error_return;
|
|
}
|
|
uring->bdev.product_name = "URING bdev";
|
|
uring->bdev.module = &uring_if;
|
|
|
|
uring->bdev.write_cache = 1;
|
|
|
|
block_size = spdk_fd_get_blocklen(uring->fd);
|
|
if (block_size == 0) {
|
|
SPDK_ERRLOG("Block size could not be auto-detected\n");
|
|
goto error_return;
|
|
}
|
|
|
|
if (block_size < 512) {
|
|
SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
|
|
goto error_return;
|
|
}
|
|
|
|
if (!spdk_u32_is_pow2(block_size)) {
|
|
SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
|
|
goto error_return;
|
|
}
|
|
|
|
uring->bdev.blocklen = block_size;
|
|
uring->bdev.required_alignment = spdk_u32log2(block_size);
|
|
|
|
if (bdev_size % uring->bdev.blocklen != 0) {
|
|
SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
|
|
bdev_size, uring->bdev.blocklen);
|
|
goto error_return;
|
|
}
|
|
|
|
uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen;
|
|
uring->bdev.ctxt = uring;
|
|
|
|
uring->bdev.fn_table = &uring_fn_table;
|
|
|
|
spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb,
|
|
sizeof(struct bdev_uring_io_channel),
|
|
uring->bdev.name);
|
|
rc = spdk_bdev_register(&uring->bdev);
|
|
if (rc) {
|
|
spdk_io_device_unregister(uring, NULL);
|
|
goto error_return;
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link);
|
|
return &uring->bdev;
|
|
|
|
error_return:
|
|
bdev_uring_close(uring);
|
|
uring_free_bdev(uring);
|
|
return NULL;
|
|
}
|
|
|
|
struct delete_uring_bdev_ctx {
|
|
spdk_delete_uring_complete cb_fn;
|
|
void *cb_arg;
|
|
};
|
|
|
|
static void
|
|
uring_bdev_unregister_cb(void *arg, int bdeverrno)
|
|
{
|
|
struct delete_uring_bdev_ctx *ctx = arg;
|
|
|
|
ctx->cb_fn(ctx->cb_arg, bdeverrno);
|
|
free(ctx);
|
|
}
|
|
|
|
void
|
|
delete_uring_bdev(struct spdk_bdev *bdev, spdk_delete_uring_complete cb_fn, void *cb_arg)
|
|
{
|
|
struct delete_uring_bdev_ctx *ctx;
|
|
|
|
if (!bdev || bdev->module != &uring_if) {
|
|
cb_fn(cb_arg, -ENODEV);
|
|
return;
|
|
}
|
|
|
|
ctx = calloc(1, sizeof(*ctx));
|
|
if (ctx == NULL) {
|
|
cb_fn(cb_arg, -ENOMEM);
|
|
return;
|
|
}
|
|
|
|
ctx->cb_fn = cb_fn;
|
|
ctx->cb_arg = cb_arg;
|
|
spdk_bdev_unregister(bdev, uring_bdev_unregister_cb, ctx);
|
|
}
|
|
|
|
static int
|
|
bdev_uring_init(void)
|
|
{
|
|
size_t i;
|
|
struct spdk_conf_section *sp;
|
|
struct spdk_bdev *bdev;
|
|
|
|
TAILQ_INIT(&g_uring_bdev_head);
|
|
spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb,
|
|
sizeof(struct bdev_uring_group_channel),
|
|
"uring_module");
|
|
|
|
sp = spdk_conf_find_section(NULL, "URING");
|
|
if (!sp) {
|
|
return 0;
|
|
}
|
|
|
|
i = 0;
|
|
while (true) {
|
|
const char *file;
|
|
const char *name;
|
|
|
|
file = spdk_conf_section_get_nmval(sp, "URING", i, 0);
|
|
if (!file) {
|
|
break;
|
|
}
|
|
|
|
name = spdk_conf_section_get_nmval(sp, "URING", i, 1);
|
|
if (!name) {
|
|
SPDK_ERRLOG("No name provided for URING bdev with file %s\n", file);
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
bdev = create_uring_bdev(name, file);
|
|
if (!bdev) {
|
|
SPDK_ERRLOG("Unable to create URING bdev from file %s\n", file);
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bdev_uring_fini(void)
|
|
{
|
|
spdk_io_device_unregister(&uring_if, NULL);
|
|
}
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT("uring", SPDK_LOG_URING)
|