2022-06-03 19:15:11 +00:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
2016-08-03 17:36:14 +00:00
|
|
|
* Copyright (c) Intel Corporation.
|
|
|
|
* All rights reserved.
|
2022-02-08 14:50:52 +00:00
|
|
|
* Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2016-08-03 17:36:14 +00:00
|
|
|
*/
|
|
|
|
|
2017-07-13 04:08:53 +00:00
|
|
|
#include "bdev_aio.h"
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2017-05-02 18:18:25 +00:00
|
|
|
#include "spdk/stdinc.h"
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2019-02-05 19:17:31 +00:00
|
|
|
#include "spdk/barrier.h"
|
2016-08-03 17:36:14 +00:00
|
|
|
#include "spdk/bdev.h"
|
2019-02-28 23:14:33 +00:00
|
|
|
#include "spdk/bdev_module.h"
|
2017-03-27 19:59:40 +00:00
|
|
|
#include "spdk/env.h"
|
2016-09-14 15:47:21 +00:00
|
|
|
#include "spdk/fd.h"
|
2019-02-05 19:17:31 +00:00
|
|
|
#include "spdk/likely.h"
|
2018-06-11 20:32:15 +00:00
|
|
|
#include "spdk/thread.h"
|
2017-08-10 22:51:08 +00:00
|
|
|
#include "spdk/json.h"
|
2017-08-10 00:28:32 +00:00
|
|
|
#include "spdk/util.h"
|
2017-12-15 02:25:23 +00:00
|
|
|
#include "spdk/string.h"
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2020-10-06 16:16:26 +00:00
|
|
|
#include "spdk/log.h"
|
2016-11-07 22:10:28 +00:00
|
|
|
|
2020-09-16 20:31:11 +00:00
|
|
|
#include <sys/eventfd.h>
|
2019-02-04 21:01:21 +00:00
|
|
|
#include <libaio.h>
|
|
|
|
|
|
|
|
struct bdev_aio_io_channel {
|
|
|
|
uint64_t io_inflight;
|
2020-08-15 03:57:10 +00:00
|
|
|
io_context_t io_ctx;
|
2019-02-04 21:40:43 +00:00
|
|
|
struct bdev_aio_group_channel *group_ch;
|
2020-08-15 03:57:10 +00:00
|
|
|
TAILQ_ENTRY(bdev_aio_io_channel) link;
|
2019-02-04 21:01:21 +00:00
|
|
|
};
|
|
|
|
|
2019-02-04 21:02:53 +00:00
|
|
|
struct bdev_aio_group_channel {
|
2021-01-02 20:53:21 +00:00
|
|
|
/* eventfd for io completion notification in interrupt mode.
|
|
|
|
* Negative value like '-1' indicates it is invalid or unused.
|
|
|
|
*/
|
2020-09-16 20:31:11 +00:00
|
|
|
int efd;
|
|
|
|
struct spdk_interrupt *intr;
|
2019-02-04 21:07:58 +00:00
|
|
|
struct spdk_poller *poller;
|
2020-08-15 03:57:10 +00:00
|
|
|
TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head;
|
2019-02-04 21:02:53 +00:00
|
|
|
};
|
|
|
|
|
2019-02-04 21:27:53 +00:00
|
|
|
struct bdev_aio_task {
|
|
|
|
struct iocb iocb;
|
|
|
|
uint64_t len;
|
|
|
|
struct bdev_aio_io_channel *ch;
|
|
|
|
};
|
|
|
|
|
2019-02-04 21:01:21 +00:00
|
|
|
struct file_disk {
|
|
|
|
struct bdev_aio_task *reset_task;
|
|
|
|
struct spdk_poller *reset_retry_timer;
|
|
|
|
struct spdk_bdev disk;
|
|
|
|
char *filename;
|
|
|
|
int fd;
|
|
|
|
TAILQ_ENTRY(file_disk) link;
|
|
|
|
bool block_size_override;
|
2022-10-11 10:15:36 +00:00
|
|
|
bool readonly;
|
2019-02-04 21:01:21 +00:00
|
|
|
};
|
|
|
|
|
2019-02-05 19:17:31 +00:00
|
|
|
/* For user space reaping of completions */
|
|
|
|
struct spdk_aio_ring {
|
|
|
|
uint32_t id;
|
|
|
|
uint32_t size;
|
|
|
|
uint32_t head;
|
|
|
|
uint32_t tail;
|
|
|
|
|
|
|
|
uint32_t version;
|
|
|
|
uint32_t compat_features;
|
|
|
|
uint32_t incompat_features;
|
|
|
|
uint32_t header_length;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define SPDK_AIO_RING_VERSION 0xa10a10a1
|
|
|
|
|
2017-07-13 17:36:19 +00:00
|
|
|
static int bdev_aio_initialize(void);
|
2018-08-08 02:58:03 +00:00
|
|
|
static void bdev_aio_fini(void);
|
2016-08-03 17:36:14 +00:00
|
|
|
static void aio_free_disk(struct file_disk *fdisk);
|
2020-10-12 15:18:16 +00:00
|
|
|
static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head);
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2017-07-20 14:23:22 +00:00
|
|
|
#define SPDK_AIO_QUEUE_DEPTH 128
|
2018-08-10 07:59:38 +00:00
|
|
|
#define MAX_EVENTS_PER_POLL 32
|
2017-07-20 14:23:22 +00:00
|
|
|
|
2016-08-03 17:36:14 +00:00
|
|
|
static int
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_get_ctx_size(void)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
2017-07-13 04:08:53 +00:00
|
|
|
return sizeof(struct bdev_aio_task);
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
|
2018-03-09 22:20:21 +00:00
|
|
|
static struct spdk_bdev_module aio_if = {
|
2018-03-06 18:52:46 +00:00
|
|
|
.name = "aio",
|
|
|
|
.module_init = bdev_aio_initialize,
|
2018-08-08 02:58:03 +00:00
|
|
|
.module_fini = bdev_aio_fini,
|
2018-03-06 18:52:46 +00:00
|
|
|
.get_ctx_size = bdev_aio_get_ctx_size,
|
|
|
|
};
|
|
|
|
|
2019-02-05 10:46:48 +00:00
|
|
|
SPDK_BDEV_MODULE_REGISTER(aio, &aio_if)
|
2016-08-03 17:36:14 +00:00
|
|
|
|
|
|
|
static int
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_open(struct file_disk *disk)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
|
|
|
int fd;
|
2022-10-11 10:15:36 +00:00
|
|
|
int io_flag = disk->readonly ? O_RDONLY : O_RDWR;
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2022-10-11 10:15:36 +00:00
|
|
|
fd = open(disk->filename, io_flag | O_DIRECT);
|
2016-08-03 17:36:14 +00:00
|
|
|
if (fd < 0) {
|
2017-08-10 00:28:32 +00:00
|
|
|
/* Try without O_DIRECT for non-disk files */
|
2022-10-11 10:15:36 +00:00
|
|
|
fd = open(disk->filename, io_flag);
|
2017-08-10 00:28:32 +00:00
|
|
|
if (fd < 0) {
|
2017-12-15 02:25:23 +00:00
|
|
|
SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
|
2017-11-07 12:56:52 +00:00
|
|
|
disk->filename, errno, spdk_strerror(errno));
|
2017-08-10 00:28:32 +00:00
|
|
|
disk->fd = -1;
|
|
|
|
return -1;
|
|
|
|
}
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
disk->fd = fd;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_close(struct file_disk *disk)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if (disk->fd == -1) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = close(disk->fd);
|
|
|
|
if (rc < 0) {
|
2017-12-15 02:25:23 +00:00
|
|
|
SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
|
2017-11-07 12:56:52 +00:00
|
|
|
disk->fd, errno, spdk_strerror(errno));
|
2016-08-03 17:36:14 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
disk->fd = -1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-08-31 23:08:20 +00:00
|
|
|
static void
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
|
|
|
|
struct bdev_aio_task *aio_task,
|
|
|
|
struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
|
|
|
struct iocb *iocb = &aio_task->iocb;
|
2017-07-13 04:08:53 +00:00
|
|
|
struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
|
2016-08-03 17:36:14 +00:00
|
|
|
int rc;
|
|
|
|
|
2016-10-04 14:39:27 +00:00
|
|
|
io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
|
2021-01-02 20:53:21 +00:00
|
|
|
if (aio_ch->group_ch->efd >= 0) {
|
2020-09-16 20:31:11 +00:00
|
|
|
io_set_eventfd(iocb, aio_ch->group_ch->efd);
|
|
|
|
}
|
2016-08-03 17:36:14 +00:00
|
|
|
iocb->data = aio_task;
|
|
|
|
aio_task->len = nbytes;
|
2019-02-04 21:27:53 +00:00
|
|
|
aio_task->ch = aio_ch;
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2020-09-04 11:27:29 +00:00
|
|
|
SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n",
|
2016-10-04 14:39:27 +00:00
|
|
|
iovcnt, nbytes, offset);
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2020-08-15 03:57:10 +00:00
|
|
|
rc = io_submit(aio_ch->io_ctx, 1, &iocb);
|
2021-08-31 23:08:20 +00:00
|
|
|
if (spdk_unlikely(rc < 0)) {
|
2018-04-23 15:27:12 +00:00
|
|
|
if (rc == -EAGAIN) {
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
|
|
|
|
} else {
|
2020-12-24 08:16:40 +00:00
|
|
|
spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc);
|
2018-04-23 15:27:12 +00:00
|
|
|
SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
}
|
2021-08-31 23:08:20 +00:00
|
|
|
} else {
|
|
|
|
aio_ch->io_inflight++;
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-31 23:08:20 +00:00
|
|
|
static void
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
|
|
|
|
struct bdev_aio_task *aio_task,
|
|
|
|
struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
|
|
|
struct iocb *iocb = &aio_task->iocb;
|
2017-07-13 04:08:53 +00:00
|
|
|
struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
|
2016-08-03 17:36:14 +00:00
|
|
|
int rc;
|
|
|
|
|
2016-10-04 14:39:27 +00:00
|
|
|
io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
|
2021-01-02 20:53:21 +00:00
|
|
|
if (aio_ch->group_ch->efd >= 0) {
|
2020-09-16 20:31:11 +00:00
|
|
|
io_set_eventfd(iocb, aio_ch->group_ch->efd);
|
|
|
|
}
|
2016-08-03 17:36:14 +00:00
|
|
|
iocb->data = aio_task;
|
|
|
|
aio_task->len = len;
|
2019-02-04 21:27:53 +00:00
|
|
|
aio_task->ch = aio_ch;
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2020-09-04 11:27:29 +00:00
|
|
|
SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n",
|
2016-08-03 17:36:14 +00:00
|
|
|
iovcnt, len, offset);
|
|
|
|
|
2020-08-15 03:57:10 +00:00
|
|
|
rc = io_submit(aio_ch->io_ctx, 1, &iocb);
|
2021-08-31 23:08:20 +00:00
|
|
|
if (spdk_unlikely(rc < 0)) {
|
2018-04-23 15:27:12 +00:00
|
|
|
if (rc == -EAGAIN) {
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
|
|
|
|
} else {
|
2020-12-24 08:16:40 +00:00
|
|
|
spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc);
|
2018-04-23 15:27:12 +00:00
|
|
|
SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
|
bdev: add ENOMEM handling
At very high queue depths, bdev modules may not have enough
internal resources to track all of the incoming I/O. For example,
we allocate a finite number of nvme_request objects per allocated
queue pair. Currently if these resources are exhausted, the
bdev module will return failure (with no indication why) which
gets propagated all the way back to the application.
So instead, add SPDK_BDEV_IO_STATUS_NOMEM to allow bdev modules
to indicate this type of failure. Also add handling for this
status type in the generic bdev layer, involving queuing these
I/O for later retry after other I/O on the failing channel have
completed.
This does place an expectation on the bdev module that these
internal resources are allocated per io_channel. Otherwise we
cannot guarantee forward progress solely on reception of
completions. For example, without this guarantee, a bdev
module could theoretically return ENOMEM even if there were
no I/O oustanding for that io_channel. nvme, aio, rbd,
virtio and null drivers comply with this expectation already.
malloc only complies though when not using copy offload.
This patch will fix malloc w/ copy engine to at least
return ENOMEM when no copy descriptors are available. If the
condition above occurs, I/O waiting for resources will get
failed as part of a subsequent reset which matches the
behavior it has today.
Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iea7cd51a611af8abe882794d0b2361fdbb74e84e
Reviewed-on: https://review.gerrithub.io/378853
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
2017-09-15 20:47:17 +00:00
|
|
|
}
|
2021-08-31 23:08:20 +00:00
|
|
|
} else {
|
|
|
|
aio_ch->io_inflight++;
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-14 15:01:43 +00:00
|
|
|
static void
|
2018-06-05 06:54:19 +00:00
|
|
|
bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
|
|
|
int rc = fsync(fdisk->fd);
|
|
|
|
|
2020-12-24 08:16:40 +00:00
|
|
|
if (rc == 0) {
|
|
|
|
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
|
|
|
|
} else {
|
|
|
|
spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno);
|
|
|
|
}
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
|
2021-04-28 15:10:13 +00:00
|
|
|
static void
|
|
|
|
bdev_aio_destruct_cb(void *io_device)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
2021-04-28 15:10:13 +00:00
|
|
|
struct file_disk *fdisk = io_device;
|
2016-08-03 17:36:14 +00:00
|
|
|
int rc = 0;
|
|
|
|
|
2017-08-22 05:37:57 +00:00
|
|
|
TAILQ_REMOVE(&g_aio_disk_head, fdisk, link);
|
2017-07-13 04:08:53 +00:00
|
|
|
rc = bdev_aio_close(fdisk);
|
2016-08-03 17:36:14 +00:00
|
|
|
if (rc < 0) {
|
2017-07-13 04:08:53 +00:00
|
|
|
SPDK_ERRLOG("bdev_aio_close() failed\n");
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
2019-01-25 20:57:46 +00:00
|
|
|
aio_free_disk(fdisk);
|
2021-04-28 15:10:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
bdev_aio_destruct(void *ctx)
|
|
|
|
{
|
|
|
|
struct file_disk *fdisk = ctx;
|
|
|
|
|
|
|
|
spdk_io_device_unregister(fdisk, bdev_aio_destruct_cb);
|
|
|
|
|
|
|
|
return 0;
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
|
2019-02-05 19:17:31 +00:00
|
|
|
static int
|
|
|
|
bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents)
|
|
|
|
{
|
|
|
|
uint32_t head, tail, count;
|
|
|
|
struct spdk_aio_ring *ring;
|
|
|
|
struct timespec timeout;
|
|
|
|
struct io_event *kevents;
|
|
|
|
|
|
|
|
ring = (struct spdk_aio_ring *)io_ctx;
|
|
|
|
|
|
|
|
if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) {
|
|
|
|
timeout.tv_sec = 0;
|
|
|
|
timeout.tv_nsec = 0;
|
|
|
|
|
|
|
|
return io_getevents(io_ctx, 0, max, uevents, &timeout);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Read the current state out of the ring */
|
|
|
|
head = ring->head;
|
|
|
|
tail = ring->tail;
|
|
|
|
|
|
|
|
/* This memory barrier is required to prevent the loads above
|
|
|
|
* from being re-ordered with stores to the events array
|
|
|
|
* potentially occurring on other threads. */
|
|
|
|
spdk_smp_rmb();
|
|
|
|
|
|
|
|
/* Calculate how many items are in the circular ring */
|
|
|
|
count = tail - head;
|
|
|
|
if (tail < head) {
|
|
|
|
count += ring->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reduce the count to the limit provided by the user */
|
|
|
|
count = spdk_min(max, count);
|
|
|
|
|
|
|
|
/* Grab the memory location of the event array */
|
|
|
|
kevents = (struct io_event *)((uintptr_t)ring + ring->header_length);
|
|
|
|
|
|
|
|
/* Copy the events out of the ring. */
|
|
|
|
if ((head + count) <= ring->size) {
|
|
|
|
/* Only one copy is required */
|
|
|
|
memcpy(uevents, &kevents[head], count * sizeof(struct io_event));
|
|
|
|
} else {
|
|
|
|
uint32_t first_part = ring->size - head;
|
|
|
|
/* Two copies are required */
|
|
|
|
memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event));
|
|
|
|
memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Update the head pointer. On x86, stores will not be reordered with older loads,
|
|
|
|
* so the copies out of the event array will always be complete prior to this
|
|
|
|
* update becoming visible. On other architectures this is not guaranteed, so
|
|
|
|
* add a barrier. */
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
|
|
spdk_compiler_barrier();
|
|
|
|
#else
|
2019-07-29 05:44:04 +00:00
|
|
|
spdk_smp_mb();
|
2019-02-05 19:17:31 +00:00
|
|
|
#endif
|
|
|
|
ring->head = (head + count) % ring->size;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2018-03-13 00:16:47 +00:00
|
|
|
static int
|
2020-08-15 03:57:10 +00:00
|
|
|
bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
2022-09-05 16:32:31 +00:00
|
|
|
int nr, i, res = 0;
|
2017-07-13 04:08:53 +00:00
|
|
|
struct bdev_aio_task *aio_task;
|
2017-07-20 14:23:22 +00:00
|
|
|
struct io_event events[SPDK_AIO_QUEUE_DEPTH];
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2020-08-15 03:57:10 +00:00
|
|
|
nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
|
2019-02-04 21:37:38 +00:00
|
|
|
if (nr < 0) {
|
2020-08-15 03:57:10 +00:00
|
|
|
return 0;
|
2019-02-04 21:37:38 +00:00
|
|
|
}
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2019-02-04 21:37:38 +00:00
|
|
|
for (i = 0; i < nr; i++) {
|
|
|
|
aio_task = events[i].data;
|
2020-12-24 08:16:40 +00:00
|
|
|
aio_task->ch->io_inflight--;
|
2022-06-03 01:53:19 +00:00
|
|
|
if (events[i].res == aio_task->len) {
|
2020-12-24 08:16:40 +00:00
|
|
|
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
|
2019-02-04 21:37:38 +00:00
|
|
|
} else {
|
2022-06-03 01:53:19 +00:00
|
|
|
/* From aio_abi.h, io_event.res is defined __s64, negative errno
|
|
|
|
* will be assigned to io_event.res for error situation.
|
|
|
|
* But from libaio.h, io_event.res is defined unsigned long, so
|
|
|
|
* convert it to signed value for error detection.
|
|
|
|
*/
|
2022-02-08 14:50:52 +00:00
|
|
|
SPDK_ERRLOG("failed to complete aio: rc %"PRId64"\n", events[i].res);
|
2022-09-05 16:32:31 +00:00
|
|
|
res = (int)events[i].res;
|
|
|
|
if (res < 0) {
|
|
|
|
spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res);
|
|
|
|
} else {
|
|
|
|
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
2018-08-08 02:58:03 +00:00
|
|
|
}
|
2017-11-13 09:38:56 +00:00
|
|
|
}
|
2018-03-13 00:16:47 +00:00
|
|
|
|
2020-08-15 03:57:10 +00:00
|
|
|
return nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
bdev_aio_group_poll(void *arg)
|
|
|
|
{
|
|
|
|
struct bdev_aio_group_channel *group_ch = arg;
|
|
|
|
struct bdev_aio_io_channel *io_ch;
|
|
|
|
int nr = 0;
|
|
|
|
|
|
|
|
TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) {
|
|
|
|
nr += bdev_aio_io_channel_poll(io_ch);
|
|
|
|
}
|
|
|
|
|
2020-05-04 09:51:27 +00:00
|
|
|
return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
|
2017-11-13 09:38:56 +00:00
|
|
|
}
|
|
|
|
|
2020-09-16 20:31:11 +00:00
|
|
|
static int
|
|
|
|
bdev_aio_group_interrupt(void *arg)
|
|
|
|
{
|
|
|
|
struct bdev_aio_group_channel *group_ch = arg;
|
|
|
|
int rc;
|
|
|
|
uint64_t num_events;
|
|
|
|
|
2021-01-02 20:53:21 +00:00
|
|
|
assert(group_ch->efd >= 0);
|
2020-09-16 20:31:11 +00:00
|
|
|
|
|
|
|
/* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH,
|
|
|
|
* io_getevent should be called again to ensure all completed IO are processed.
|
|
|
|
*/
|
|
|
|
rc = read(group_ch->efd, &num_events, sizeof(num_events));
|
|
|
|
if (rc < 0) {
|
|
|
|
SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno));
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num_events > SPDK_AIO_QUEUE_DEPTH) {
|
|
|
|
num_events -= SPDK_AIO_QUEUE_DEPTH;
|
2020-10-24 10:32:43 +00:00
|
|
|
rc = write(group_ch->efd, &num_events, sizeof(num_events));
|
|
|
|
if (rc < 0) {
|
2020-09-16 20:31:11 +00:00
|
|
|
SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return bdev_aio_group_poll(group_ch);
|
|
|
|
}
|
|
|
|
|
2017-12-11 22:14:19 +00:00
|
|
|
static void
|
|
|
|
_bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i)
|
2017-11-13 09:38:56 +00:00
|
|
|
{
|
2017-12-11 22:14:19 +00:00
|
|
|
struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
|
2017-11-13 09:38:56 +00:00
|
|
|
struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
|
|
|
|
|
|
|
|
if (aio_ch->io_inflight) {
|
2017-12-11 22:14:19 +00:00
|
|
|
spdk_for_each_channel_continue(i, -1);
|
|
|
|
return;
|
2017-11-13 09:38:56 +00:00
|
|
|
}
|
2017-12-11 22:14:19 +00:00
|
|
|
|
|
|
|
spdk_for_each_channel_continue(i, 0);
|
2017-11-13 09:38:56 +00:00
|
|
|
}
|
|
|
|
|
2018-03-13 00:16:47 +00:00
|
|
|
static int bdev_aio_reset_retry_timer(void *arg);
|
2017-11-13 09:38:56 +00:00
|
|
|
|
|
|
|
static void
|
2017-12-11 22:14:19 +00:00
|
|
|
_bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
|
2017-11-13 09:38:56 +00:00
|
|
|
{
|
2017-12-11 22:14:19 +00:00
|
|
|
struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i);
|
2017-11-13 09:38:56 +00:00
|
|
|
|
|
|
|
if (status == -1) {
|
2020-04-14 06:49:46 +00:00
|
|
|
fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500);
|
2017-11-13 09:38:56 +00:00
|
|
|
return;
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
2017-11-13 09:38:56 +00:00
|
|
|
|
|
|
|
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2018-03-13 00:16:47 +00:00
|
|
|
static int
|
2017-11-13 09:38:56 +00:00
|
|
|
bdev_aio_reset_retry_timer(void *arg)
|
|
|
|
{
|
|
|
|
struct file_disk *fdisk = arg;
|
|
|
|
|
|
|
|
if (fdisk->reset_retry_timer) {
|
|
|
|
spdk_poller_unregister(&fdisk->reset_retry_timer);
|
|
|
|
}
|
|
|
|
|
2018-02-19 22:28:16 +00:00
|
|
|
spdk_for_each_channel(fdisk,
|
2017-11-13 09:38:56 +00:00
|
|
|
_bdev_aio_get_io_inflight,
|
|
|
|
fdisk,
|
|
|
|
_bdev_aio_get_io_inflight_done);
|
2018-03-13 00:16:47 +00:00
|
|
|
|
2020-05-04 09:51:27 +00:00
|
|
|
return SPDK_POLLER_BUSY;
|
2016-09-21 00:47:25 +00:00
|
|
|
}
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2016-10-14 15:01:43 +00:00
|
|
|
static void
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
2017-11-13 09:38:56 +00:00
|
|
|
fdisk->reset_task = aio_task;
|
|
|
|
|
|
|
|
bdev_aio_reset_retry_timer(fdisk);
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
|
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb
When the specified buffer size to spdk_bdev_io_get_buf() is greater
than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and
doesn't call the specified callback function.
SPDK SCSI library doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
Bdev perf tool also doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
When we support DIF insert and strip in iSCSI target, the read
buffer size iSCSI initiator requests and the read buffer size iSCSI target
requests will become different.
Even after that, iSCSI initiator and iSCSI target will negotiate correctly
not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI
initiator ignores the result of negotiation, iSCSI initiator can request
read buffer size larger than the permitted maximum, and can cause
failure in iSCSI target. This is very flagile and should be avoided.
This patch do the following
- Add the completion status of spdk_bdev_io_get_buf() to
spdk_bdev_io_get_buf_cb(),
- spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting
success to false, and return.
- spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success
is false.
Subsequent patches will process the case that success is false
in spdk_bdev_io_get_buf_cb().
Change-Id: I76429a86e18a69aa085a353ac94743296d270b82
Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-on: https://review.gerrithub.io/c/446045
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
|
|
|
static void
|
|
|
|
bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
|
|
|
|
bool success)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
2019-02-25 01:43:13 +00:00
|
|
|
if (!success) {
|
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
return;
|
|
|
|
}
|
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb
When the specified buffer size to spdk_bdev_io_get_buf() is greater
than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and
doesn't call the specified callback function.
SPDK SCSI library doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
Bdev perf tool also doesn't allocate read buffer and specifies
expected read buffer size, and expects that it is allocated by
spdk_bdev_io_get_buf().
When we support DIF insert and strip in iSCSI target, the read
buffer size iSCSI initiator requests and the read buffer size iSCSI target
requests will become different.
Even after that, iSCSI initiator and iSCSI target will negotiate correctly
not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI
initiator ignores the result of negotiation, iSCSI initiator can request
read buffer size larger than the permitted maximum, and can cause
failure in iSCSI target. This is very flagile and should be avoided.
This patch do the following
- Add the completion status of spdk_bdev_io_get_buf() to
spdk_bdev_io_get_buf_cb(),
- spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting
success to false, and return.
- spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success
is false.
Subsequent patches will process the case that success is false
in spdk_bdev_io_get_buf_cb().
Change-Id: I76429a86e18a69aa085a353ac94743296d270b82
Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-on: https://review.gerrithub.io/c/446045
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
|
|
|
|
2016-08-03 17:36:14 +00:00
|
|
|
switch (bdev_io->type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
2018-10-03 13:32:00 +00:00
|
|
|
bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt,
|
|
|
|
ch,
|
|
|
|
(struct bdev_aio_task *)bdev_io->driver_ctx,
|
|
|
|
bdev_io->u.bdev.iovs,
|
|
|
|
bdev_io->u.bdev.iovcnt,
|
|
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
|
|
|
|
bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
|
|
|
|
break;
|
2016-08-03 17:36:14 +00:00
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt,
|
|
|
|
ch,
|
|
|
|
(struct bdev_aio_task *)bdev_io->driver_ctx,
|
2017-09-20 13:10:17 +00:00
|
|
|
bdev_io->u.bdev.iovs,
|
|
|
|
bdev_io->u.bdev.iovcnt,
|
|
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
|
|
|
|
bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
|
2018-10-03 13:32:00 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
SPDK_ERRLOG("Wrong io type\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-22 21:35:04 +00:00
|
|
|
static int
|
|
|
|
_bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
|
2018-10-03 13:32:00 +00:00
|
|
|
{
|
2022-10-11 10:15:36 +00:00
|
|
|
struct file_disk *fdisk = (struct file_disk *)bdev_io->bdev->ctxt;
|
|
|
|
|
2018-10-03 13:32:00 +00:00
|
|
|
switch (bdev_io->type) {
|
|
|
|
/* Read and write operations must be performed on buffers aligned to
|
|
|
|
* bdev->required_alignment. If user specified unaligned buffers,
|
|
|
|
* get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
|
|
spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
|
|
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
|
2016-10-14 15:01:43 +00:00
|
|
|
return 0;
|
2022-10-11 10:15:36 +00:00
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
|
|
if (fdisk->readonly) {
|
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
} else {
|
|
|
|
spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
|
|
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
|
2016-08-03 17:36:14 +00:00
|
|
|
case SPDK_BDEV_IO_TYPE_FLUSH:
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt,
|
2018-06-05 06:54:19 +00:00
|
|
|
(struct bdev_aio_task *)bdev_io->driver_ctx);
|
2016-10-14 15:01:43 +00:00
|
|
|
return 0;
|
2016-08-03 17:36:14 +00:00
|
|
|
|
|
|
|
case SPDK_BDEV_IO_TYPE_RESET:
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
|
|
|
|
(struct bdev_aio_task *)bdev_io->driver_ctx);
|
2016-10-14 15:01:43 +00:00
|
|
|
return 0;
|
2016-08-03 17:36:14 +00:00
|
|
|
default:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-22 21:35:04 +00:00
|
|
|
static void
|
|
|
|
bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
2017-07-13 04:08:53 +00:00
|
|
|
if (_bdev_aio_submit_request(ch, bdev_io) < 0) {
|
2016-08-03 17:36:14 +00:00
|
|
|
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 17:25:49 +00:00
|
|
|
static bool
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
|
2016-08-24 17:25:49 +00:00
|
|
|
{
|
|
|
|
switch (io_type) {
|
|
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
|
|
case SPDK_BDEV_IO_TYPE_FLUSH:
|
|
|
|
case SPDK_BDEV_IO_TYPE_RESET:
|
|
|
|
return true;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-16 19:53:32 +00:00
|
|
|
static int
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_create_cb(void *io_device, void *ctx_buf)
|
2016-09-16 19:53:32 +00:00
|
|
|
{
|
2017-07-13 04:08:53 +00:00
|
|
|
struct bdev_aio_io_channel *ch = ctx_buf;
|
2019-02-04 20:47:03 +00:00
|
|
|
|
2020-08-15 03:57:10 +00:00
|
|
|
if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
|
|
|
|
SPDK_ERRLOG("async I/O context setup failure\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-02-04 21:40:43 +00:00
|
|
|
ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
|
2020-08-15 03:57:10 +00:00
|
|
|
TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link);
|
2019-02-04 21:37:38 +00:00
|
|
|
|
2016-09-16 19:53:32 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
|
2016-09-16 19:53:32 +00:00
|
|
|
{
|
2019-02-04 21:40:43 +00:00
|
|
|
struct bdev_aio_io_channel *ch = ctx_buf;
|
2018-08-08 02:58:03 +00:00
|
|
|
|
2020-08-15 03:57:10 +00:00
|
|
|
io_destroy(ch->io_ctx);
|
|
|
|
|
|
|
|
assert(ch->group_ch);
|
|
|
|
TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link);
|
|
|
|
|
2019-02-04 21:40:43 +00:00
|
|
|
spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
|
2016-09-16 19:53:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct spdk_io_channel *
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_get_io_channel(void *ctx)
|
2016-09-16 19:53:32 +00:00
|
|
|
{
|
2017-04-04 21:10:00 +00:00
|
|
|
struct file_disk *fdisk = ctx;
|
|
|
|
|
2018-02-19 22:28:16 +00:00
|
|
|
return spdk_get_io_channel(fdisk);
|
2016-09-16 19:53:32 +00:00
|
|
|
}
|
|
|
|
|
2017-08-10 22:51:08 +00:00
|
|
|
|
|
|
|
static int
|
2018-02-22 12:48:13 +00:00
|
|
|
bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
|
2017-08-10 22:51:08 +00:00
|
|
|
{
|
|
|
|
struct file_disk *fdisk = ctx;
|
|
|
|
|
2019-02-01 05:34:45 +00:00
|
|
|
spdk_json_write_named_object_begin(w, "aio");
|
2017-08-10 22:51:08 +00:00
|
|
|
|
2019-02-01 05:34:45 +00:00
|
|
|
spdk_json_write_named_string(w, "filename", fdisk->filename);
|
2017-08-10 22:51:08 +00:00
|
|
|
|
2022-10-11 10:15:36 +00:00
|
|
|
spdk_json_write_named_bool(w, "block_size_override", fdisk->block_size_override);
|
|
|
|
|
|
|
|
spdk_json_write_named_bool(w, "readonly", fdisk->readonly);
|
|
|
|
|
2017-08-10 22:51:08 +00:00
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-02-23 18:26:52 +00:00
|
|
|
static void
|
|
|
|
bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
|
|
|
|
{
|
|
|
|
struct file_disk *fdisk = bdev->ctxt;
|
|
|
|
|
|
|
|
spdk_json_write_object_begin(w);
|
|
|
|
|
2019-08-13 09:38:42 +00:00
|
|
|
spdk_json_write_named_string(w, "method", "bdev_aio_create");
|
2018-02-23 18:26:52 +00:00
|
|
|
|
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
|
|
|
spdk_json_write_named_string(w, "name", bdev->name);
|
|
|
|
if (fdisk->block_size_override) {
|
|
|
|
spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
|
|
|
|
}
|
|
|
|
spdk_json_write_named_string(w, "filename", fdisk->filename);
|
2022-10-11 10:15:36 +00:00
|
|
|
spdk_json_write_named_bool(w, "readonly", fdisk->readonly);
|
2018-02-23 18:26:52 +00:00
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
}
|
|
|
|
|
2016-08-30 19:56:06 +00:00
|
|
|
static const struct spdk_bdev_fn_table aio_fn_table = {
|
2017-07-13 04:08:53 +00:00
|
|
|
.destruct = bdev_aio_destruct,
|
|
|
|
.submit_request = bdev_aio_submit_request,
|
|
|
|
.io_type_supported = bdev_aio_io_type_supported,
|
|
|
|
.get_io_channel = bdev_aio_get_io_channel,
|
2018-02-22 12:48:13 +00:00
|
|
|
.dump_info_json = bdev_aio_dump_info_json,
|
2018-02-23 18:26:52 +00:00
|
|
|
.write_config_json = bdev_aio_write_json_config,
|
2016-08-03 17:36:14 +00:00
|
|
|
};
|
|
|
|
|
2022-06-22 21:35:04 +00:00
|
|
|
static void
|
|
|
|
aio_free_disk(struct file_disk *fdisk)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
2017-12-07 23:23:48 +00:00
|
|
|
if (fdisk == NULL) {
|
2016-08-03 17:36:14 +00:00
|
|
|
return;
|
2017-12-07 23:23:48 +00:00
|
|
|
}
|
2017-08-10 22:47:45 +00:00
|
|
|
free(fdisk->filename);
|
2017-06-02 17:25:43 +00:00
|
|
|
free(fdisk->disk.name);
|
2016-08-03 17:36:14 +00:00
|
|
|
free(fdisk);
|
|
|
|
}
|
|
|
|
|
2020-09-16 20:31:11 +00:00
|
|
|
static int
|
|
|
|
bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch)
|
|
|
|
{
|
|
|
|
int efd;
|
|
|
|
|
|
|
|
efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
|
|
|
if (efd < 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch);
|
|
|
|
if (ch->intr == NULL) {
|
|
|
|
close(efd);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
ch->efd = efd;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch)
|
|
|
|
{
|
|
|
|
spdk_interrupt_unregister(&ch->intr);
|
|
|
|
close(ch->efd);
|
2021-01-02 20:53:21 +00:00
|
|
|
ch->efd = -1;
|
2020-09-16 20:31:11 +00:00
|
|
|
}
|
|
|
|
|
2021-01-02 20:54:04 +00:00
|
|
|
static void
|
|
|
|
bdev_aio_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-08-08 02:58:03 +00:00
|
|
|
static int
|
|
|
|
bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
|
|
|
|
{
|
|
|
|
struct bdev_aio_group_channel *ch = ctx_buf;
|
2021-01-02 20:54:04 +00:00
|
|
|
int rc;
|
2018-08-08 02:58:03 +00:00
|
|
|
|
2020-08-15 03:57:10 +00:00
|
|
|
TAILQ_INIT(&ch->io_ch_head);
|
2021-01-02 20:53:21 +00:00
|
|
|
/* Initialize ch->efd to be invalid and unused. */
|
|
|
|
ch->efd = -1;
|
2020-09-16 20:31:11 +00:00
|
|
|
if (spdk_interrupt_mode_is_enabled()) {
|
2021-01-02 20:54:04 +00:00
|
|
|
rc = bdev_aio_register_interrupt(ch);
|
|
|
|
if (rc < 0) {
|
|
|
|
SPDK_ERRLOG("Failed to prepare intr resource to bdev_aio\n");
|
|
|
|
return rc;
|
|
|
|
}
|
2020-09-16 20:31:11 +00:00
|
|
|
}
|
|
|
|
|
2020-04-14 06:49:46 +00:00
|
|
|
ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0);
|
2021-01-02 20:54:04 +00:00
|
|
|
spdk_poller_register_interrupt(ch->poller, bdev_aio_poller_set_interrupt_mode, NULL);
|
2020-09-16 20:31:11 +00:00
|
|
|
|
2018-08-08 02:58:03 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
|
|
|
|
{
|
|
|
|
struct bdev_aio_group_channel *ch = ctx_buf;
|
|
|
|
|
2020-08-15 03:57:10 +00:00
|
|
|
if (!TAILQ_EMPTY(&ch->io_ch_head)) {
|
|
|
|
SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n");
|
|
|
|
}
|
2019-02-04 21:37:38 +00:00
|
|
|
|
2021-01-02 20:54:04 +00:00
|
|
|
spdk_poller_unregister(&ch->poller);
|
|
|
|
if (spdk_interrupt_mode_is_enabled()) {
|
2020-09-16 20:31:11 +00:00
|
|
|
bdev_aio_unregister_interrupt(ch);
|
|
|
|
}
|
2018-08-08 02:58:03 +00:00
|
|
|
}
|
|
|
|
|
2019-04-28 06:35:58 +00:00
|
|
|
int
|
2022-10-11 10:15:36 +00:00
|
|
|
create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
|
|
|
struct file_disk *fdisk;
|
2017-08-10 00:28:32 +00:00
|
|
|
uint32_t detected_block_size;
|
2017-08-10 22:42:11 +00:00
|
|
|
uint64_t disk_size;
|
2017-11-20 09:31:39 +00:00
|
|
|
int rc;
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2017-12-12 05:21:54 +00:00
|
|
|
fdisk = calloc(1, sizeof(*fdisk));
|
2016-08-03 17:36:14 +00:00
|
|
|
if (!fdisk) {
|
|
|
|
SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
|
2019-04-28 06:35:58 +00:00
|
|
|
return -ENOMEM;
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
2022-10-11 10:15:36 +00:00
|
|
|
fdisk->readonly = readonly;
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2017-08-10 22:47:45 +00:00
|
|
|
fdisk->filename = strdup(filename);
|
|
|
|
if (!fdisk->filename) {
|
2019-04-28 06:35:58 +00:00
|
|
|
rc = -ENOMEM;
|
2017-08-10 22:47:45 +00:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
2017-07-13 04:08:53 +00:00
|
|
|
if (bdev_aio_open(fdisk)) {
|
2017-08-10 22:47:45 +00:00
|
|
|
SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno);
|
2019-04-28 06:35:58 +00:00
|
|
|
rc = -errno;
|
2016-08-03 17:36:14 +00:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
2017-08-10 22:42:11 +00:00
|
|
|
disk_size = spdk_fd_get_size(fdisk->fd);
|
2016-08-03 17:36:14 +00:00
|
|
|
|
2017-06-02 17:25:43 +00:00
|
|
|
fdisk->disk.name = strdup(name);
|
|
|
|
if (!fdisk->disk.name) {
|
2019-04-28 06:35:58 +00:00
|
|
|
rc = -ENOMEM;
|
2017-06-02 17:25:43 +00:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
fdisk->disk.product_name = "AIO disk";
|
2018-03-06 18:52:46 +00:00
|
|
|
fdisk->disk.module = &aio_if;
|
2016-08-03 17:36:14 +00:00
|
|
|
|
|
|
|
fdisk->disk.write_cache = 1;
|
2017-08-10 00:28:32 +00:00
|
|
|
|
|
|
|
detected_block_size = spdk_fd_get_blocklen(fdisk->fd);
|
|
|
|
if (block_size == 0) {
|
|
|
|
/* User did not specify block size - use autodetected block size. */
|
|
|
|
if (detected_block_size == 0) {
|
|
|
|
SPDK_ERRLOG("Block size could not be auto-detected\n");
|
2019-04-28 06:35:58 +00:00
|
|
|
rc = -EINVAL;
|
2017-08-10 00:28:32 +00:00
|
|
|
goto error_return;
|
|
|
|
}
|
2017-08-22 05:37:57 +00:00
|
|
|
fdisk->block_size_override = false;
|
2017-08-10 00:28:32 +00:00
|
|
|
block_size = detected_block_size;
|
|
|
|
} else {
|
|
|
|
if (block_size < detected_block_size) {
|
|
|
|
SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
|
|
|
|
"auto-detected block size %" PRIu32 "\n",
|
|
|
|
block_size, detected_block_size);
|
2019-04-28 06:35:58 +00:00
|
|
|
rc = -EINVAL;
|
2017-08-10 00:28:32 +00:00
|
|
|
goto error_return;
|
|
|
|
} else if (detected_block_size != 0 && block_size != detected_block_size) {
|
|
|
|
SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
|
|
|
|
"auto-detected block size %" PRIu32 "\n",
|
|
|
|
block_size, detected_block_size);
|
|
|
|
}
|
2017-08-22 05:37:57 +00:00
|
|
|
fdisk->block_size_override = true;
|
2017-08-10 00:28:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (block_size < 512) {
|
|
|
|
SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
|
2019-04-28 06:35:58 +00:00
|
|
|
rc = -EINVAL;
|
2017-08-10 00:28:32 +00:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!spdk_u32_is_pow2(block_size)) {
|
|
|
|
SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
|
2019-04-28 06:35:58 +00:00
|
|
|
rc = -EINVAL;
|
2017-08-10 00:28:32 +00:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
|
|
|
fdisk->disk.blocklen = block_size;
|
2020-07-23 04:00:49 +00:00
|
|
|
if (fdisk->block_size_override && detected_block_size) {
|
|
|
|
fdisk->disk.required_alignment = spdk_u32log2(detected_block_size);
|
|
|
|
} else {
|
|
|
|
fdisk->disk.required_alignment = spdk_u32log2(block_size);
|
|
|
|
}
|
2017-08-10 00:28:32 +00:00
|
|
|
|
2017-08-10 22:42:11 +00:00
|
|
|
if (disk_size % fdisk->disk.blocklen != 0) {
|
2017-08-10 00:28:32 +00:00
|
|
|
SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
|
2017-08-10 22:42:11 +00:00
|
|
|
disk_size, fdisk->disk.blocklen);
|
2019-04-28 06:35:58 +00:00
|
|
|
rc = -EINVAL;
|
2017-08-10 00:28:32 +00:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
2017-08-10 22:42:11 +00:00
|
|
|
fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen;
|
2016-08-03 17:36:14 +00:00
|
|
|
fdisk->disk.ctxt = fdisk;
|
|
|
|
|
|
|
|
fdisk->disk.fn_table = &aio_fn_table;
|
|
|
|
|
2018-02-19 22:28:16 +00:00
|
|
|
spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb,
|
2018-08-30 20:26:50 +00:00
|
|
|
sizeof(struct bdev_aio_io_channel),
|
|
|
|
fdisk->disk.name);
|
2017-11-20 09:31:39 +00:00
|
|
|
rc = spdk_bdev_register(&fdisk->disk);
|
|
|
|
if (rc) {
|
2018-02-19 22:28:16 +00:00
|
|
|
spdk_io_device_unregister(fdisk, NULL);
|
2017-11-20 09:31:39 +00:00
|
|
|
goto error_return;
|
|
|
|
}
|
2017-08-22 05:37:57 +00:00
|
|
|
|
|
|
|
TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link);
|
2019-04-28 06:35:58 +00:00
|
|
|
return 0;
|
2016-08-03 17:36:14 +00:00
|
|
|
|
|
|
|
error_return:
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_close(fdisk);
|
2016-08-03 17:36:14 +00:00
|
|
|
aio_free_disk(fdisk);
|
2019-04-28 06:35:58 +00:00
|
|
|
return rc;
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
|
2022-03-29 07:23:40 +00:00
|
|
|
static void
|
|
|
|
dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2022-01-13 20:37:36 +00:00
|
|
|
int
|
2022-03-29 07:23:40 +00:00
|
|
|
bdev_aio_rescan(const char *name)
|
2022-01-13 20:37:36 +00:00
|
|
|
{
|
2022-03-29 07:23:40 +00:00
|
|
|
struct spdk_bdev_desc *desc;
|
|
|
|
struct spdk_bdev *bdev;
|
2022-01-13 20:37:36 +00:00
|
|
|
struct file_disk *fdisk;
|
|
|
|
uint64_t disk_size, blockcnt;
|
|
|
|
int rc;
|
|
|
|
|
2022-03-29 07:23:40 +00:00
|
|
|
rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
|
|
|
|
if (rc != 0) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdev = spdk_bdev_desc_get_bdev(desc);
|
|
|
|
if (bdev->module != &aio_if) {
|
|
|
|
rc = -ENODEV;
|
|
|
|
goto exit;
|
2022-01-13 20:37:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fdisk = SPDK_CONTAINEROF(bdev, struct file_disk, disk);
|
|
|
|
disk_size = spdk_fd_get_size(fdisk->fd);
|
2022-03-29 07:23:40 +00:00
|
|
|
blockcnt = disk_size / bdev->blocklen;
|
2022-01-13 20:37:36 +00:00
|
|
|
|
2022-03-29 07:23:40 +00:00
|
|
|
if (bdev->blockcnt != blockcnt) {
|
2022-01-13 20:37:36 +00:00
|
|
|
SPDK_NOTICELOG("AIO device is resized: bdev name %s, old block count %" PRIu64 ", new block count %"
|
|
|
|
PRIu64 "\n",
|
|
|
|
fdisk->filename,
|
2022-03-29 07:23:40 +00:00
|
|
|
bdev->blockcnt,
|
2022-01-13 20:37:36 +00:00
|
|
|
blockcnt);
|
2022-03-29 07:23:40 +00:00
|
|
|
rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt);
|
2022-01-13 20:37:36 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
SPDK_ERRLOG("Could not change num blocks for aio bdev: name %s, errno: %d.\n",
|
|
|
|
fdisk->filename, rc);
|
2022-03-29 07:23:40 +00:00
|
|
|
goto exit;
|
2022-01-13 20:37:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-29 07:23:40 +00:00
|
|
|
exit:
|
|
|
|
spdk_bdev_close(desc);
|
|
|
|
return rc;
|
2022-01-13 20:37:36 +00:00
|
|
|
}
|
|
|
|
|
2019-02-28 23:14:33 +00:00
|
|
|
struct delete_aio_bdev_ctx {
|
|
|
|
delete_aio_bdev_complete cb_fn;
|
2019-01-25 20:57:46 +00:00
|
|
|
void *cb_arg;
|
|
|
|
};
|
2018-07-25 17:56:43 +00:00
|
|
|
|
|
|
|
static void
|
|
|
|
aio_bdev_unregister_cb(void *arg, int bdeverrno)
|
|
|
|
{
|
2019-02-28 23:14:33 +00:00
|
|
|
struct delete_aio_bdev_ctx *ctx = arg;
|
2018-07-25 17:56:43 +00:00
|
|
|
|
2019-01-25 20:57:46 +00:00
|
|
|
ctx->cb_fn(ctx->cb_arg, bdeverrno);
|
|
|
|
free(ctx);
|
2018-07-25 17:56:43 +00:00
|
|
|
}
|
|
|
|
|
2018-06-14 14:15:36 +00:00
|
|
|
void
|
2022-03-29 05:55:53 +00:00
|
|
|
bdev_aio_delete(const char *name, delete_aio_bdev_complete cb_fn, void *cb_arg)
|
2018-06-14 14:15:36 +00:00
|
|
|
{
|
2019-02-28 23:14:33 +00:00
|
|
|
struct delete_aio_bdev_ctx *ctx;
|
2022-03-29 05:55:53 +00:00
|
|
|
int rc;
|
2018-06-14 14:15:36 +00:00
|
|
|
|
2019-01-25 20:57:46 +00:00
|
|
|
ctx = calloc(1, sizeof(*ctx));
|
|
|
|
if (ctx == NULL) {
|
|
|
|
cb_fn(cb_arg, -ENOMEM);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx->cb_fn = cb_fn;
|
|
|
|
ctx->cb_arg = cb_arg;
|
2022-03-29 05:55:53 +00:00
|
|
|
rc = spdk_bdev_unregister_by_name(name, &aio_if, aio_bdev_unregister_cb, ctx);
|
|
|
|
if (rc != 0) {
|
|
|
|
aio_bdev_unregister_cb(ctx, rc);
|
|
|
|
}
|
2018-06-14 14:15:36 +00:00
|
|
|
}
|
|
|
|
|
2017-07-13 17:36:19 +00:00
|
|
|
static int
|
2017-07-13 04:08:53 +00:00
|
|
|
bdev_aio_initialize(void)
|
2016-08-03 17:36:14 +00:00
|
|
|
{
|
2018-08-08 02:58:03 +00:00
|
|
|
spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb,
|
2020-10-12 10:53:20 +00:00
|
|
|
sizeof(struct bdev_aio_group_channel), "aio_module");
|
2017-03-01 23:50:29 +00:00
|
|
|
|
2017-07-13 17:36:19 +00:00
|
|
|
return 0;
|
2016-08-03 17:36:14 +00:00
|
|
|
}
|
|
|
|
|
2018-08-08 02:58:03 +00:00
|
|
|
static void
|
|
|
|
bdev_aio_fini(void)
|
|
|
|
{
|
|
|
|
spdk_io_device_unregister(&aio_if, NULL);
|
|
|
|
}
|
|
|
|
|
2020-09-04 11:27:29 +00:00
|
|
|
SPDK_LOG_REGISTER_COMPONENT(aio)
|