bdev/aio: Add the epoll support

This patch is used to add the epoll suport and make
the group polling more efficient.

Change-Id: I93416514a53dc45471e375b39aa051e6500de412
Signed-off-by: Ziye Yang <optimistyzy@gmail.com>
Reviewed-on: https://review.gerrithub.io/421432
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: GangCao <gang.cao@intel.com>
This commit is contained in:
Ziye Yang 2018-08-10 15:59:38 +08:00 committed by Changpeng Liu
parent 0c0d200b7c
commit 6ff8f60ebb
2 changed files with 46 additions and 7 deletions

View File

@ -53,6 +53,7 @@ static void bdev_aio_get_spdk_running_config(FILE *fp);
static TAILQ_HEAD(, file_disk) g_aio_disk_head; static TAILQ_HEAD(, file_disk) g_aio_disk_head;
#define SPDK_AIO_QUEUE_DEPTH 128 #define SPDK_AIO_QUEUE_DEPTH 128
#define MAX_EVENTS_PER_POLL 32
static int static int
bdev_aio_get_ctx_size(void) bdev_aio_get_ctx_size(void)
@ -70,7 +71,7 @@ static struct spdk_bdev_module aio_if = {
struct bdev_aio_group_channel { struct bdev_aio_group_channel {
struct spdk_poller *poller; struct spdk_poller *poller;
TAILQ_HEAD(, bdev_aio_io_channel) aio_channel_head; int epfd;
}; };
SPDK_BDEV_MODULE_REGISTER(&aio_if) SPDK_BDEV_MODULE_REGISTER(&aio_if)
@ -130,6 +131,7 @@ bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset); io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
iocb->data = aio_task; iocb->data = aio_task;
aio_task->len = nbytes; aio_task->len = nbytes;
io_set_eventfd(iocb, aio_ch->efd);
SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n", SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n",
iovcnt, nbytes, offset); iovcnt, nbytes, offset);
@ -160,6 +162,7 @@ bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset); io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
iocb->data = aio_task; iocb->data = aio_task;
aio_task->len = len; aio_task->len = len;
io_set_eventfd(iocb, aio_ch->efd);
SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n", SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n",
iovcnt, len, offset); iovcnt, len, offset);
@ -204,7 +207,14 @@ bdev_aio_destruct(void *ctx)
static int static int
bdev_aio_initialize_io_channel(struct bdev_aio_io_channel *ch) bdev_aio_initialize_io_channel(struct bdev_aio_io_channel *ch)
{ {
ch->efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
if (ch->efd == -1) {
SPDK_ERRLOG("Cannot create efd\n");
return -1;
}
if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
close(ch->efd);
SPDK_ERRLOG("async I/O context setup failure\n"); SPDK_ERRLOG("async I/O context setup failure\n");
return -1; return -1;
} }
@ -217,15 +227,23 @@ bdev_aio_group_poll(void *arg)
{ {
struct bdev_aio_group_channel *group_ch = arg; struct bdev_aio_group_channel *group_ch = arg;
struct bdev_aio_io_channel *ch; struct bdev_aio_io_channel *ch;
int nr, i, total_nr = 0; int nr, i, j, rc, total_nr = 0;
enum spdk_bdev_io_status status; enum spdk_bdev_io_status status;
struct bdev_aio_task *aio_task; struct bdev_aio_task *aio_task;
struct timespec timeout; struct timespec timeout;
struct io_event events[SPDK_AIO_QUEUE_DEPTH]; struct io_event events[SPDK_AIO_QUEUE_DEPTH];
struct epoll_event epevents[MAX_EVENTS_PER_POLL];
timeout.tv_sec = 0; timeout.tv_sec = 0;
timeout.tv_nsec = 0; timeout.tv_nsec = 0;
TAILQ_FOREACH(ch, &group_ch->aio_channel_head, link) { rc = epoll_wait(group_ch->epfd, epevents, MAX_EVENTS_PER_POLL, 0);
if (rc == -1) {
SPDK_ERRLOG("epoll_wait error(%d): %s on ch=%p\n", errno, spdk_strerror(errno), group_ch);
return -1;
}
for (j = 0; j < rc; j++) {
ch = epevents[j].data.ptr;
nr = io_getevents(ch->io_ctx, 1, SPDK_AIO_QUEUE_DEPTH, nr = io_getevents(ch->io_ctx, 1, SPDK_AIO_QUEUE_DEPTH,
events, &timeout); events, &timeout);
@ -374,6 +392,7 @@ bdev_aio_create_cb(void *io_device, void *ctx_buf)
{ {
struct bdev_aio_io_channel *ch = ctx_buf; struct bdev_aio_io_channel *ch = ctx_buf;
struct bdev_aio_group_channel *group_ch_ctx; struct bdev_aio_group_channel *group_ch_ctx;
struct epoll_event epevent;
if (bdev_aio_initialize_io_channel(ch) != 0) { if (bdev_aio_initialize_io_channel(ch) != 0) {
return -1; return -1;
@ -381,7 +400,16 @@ bdev_aio_create_cb(void *io_device, void *ctx_buf)
ch->group_ch = spdk_get_io_channel(&aio_if); ch->group_ch = spdk_get_io_channel(&aio_if);
group_ch_ctx = spdk_io_channel_get_ctx(ch->group_ch); group_ch_ctx = spdk_io_channel_get_ctx(ch->group_ch);
TAILQ_INSERT_TAIL(&group_ch_ctx->aio_channel_head, ch, link);
epevent.events = EPOLLIN | EPOLLET;
epevent.data.ptr = ch;
if (epoll_ctl(group_ch_ctx->epfd, EPOLL_CTL_ADD, ch->efd, &epevent)) {
close(ch->efd);
io_destroy(ch->io_ctx);
spdk_put_io_channel(ch->group_ch);
SPDK_ERRLOG("epoll_ctl error\n");
return -1;
}
return 0; return 0;
} }
@ -390,10 +418,12 @@ bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
{ {
struct bdev_aio_io_channel *io_channel = ctx_buf; struct bdev_aio_io_channel *io_channel = ctx_buf;
struct bdev_aio_group_channel *group_ch_ctx; struct bdev_aio_group_channel *group_ch_ctx;
struct epoll_event event;
group_ch_ctx = spdk_io_channel_get_ctx(io_channel->group_ch); group_ch_ctx = spdk_io_channel_get_ctx(io_channel->group_ch);
TAILQ_REMOVE(&group_ch_ctx->aio_channel_head, io_channel, link); epoll_ctl(group_ch_ctx->epfd, EPOLL_CTL_DEL, io_channel->efd, &event);
spdk_put_io_channel(io_channel->group_ch); spdk_put_io_channel(io_channel->group_ch);
close(io_channel->efd);
io_destroy(io_channel->io_ctx); io_destroy(io_channel->io_ctx);
} }
@ -467,7 +497,12 @@ bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
{ {
struct bdev_aio_group_channel *ch = ctx_buf; struct bdev_aio_group_channel *ch = ctx_buf;
TAILQ_INIT(&ch->aio_channel_head); ch->epfd = epoll_create1(0);
if (ch->epfd == -1) {
SPDK_ERRLOG("cannot create epoll fd\n");
return -1;
}
ch->poller = spdk_poller_register(bdev_aio_group_poll, ch, 0); ch->poller = spdk_poller_register(bdev_aio_group_poll, ch, 0);
return 0; return 0;
} }
@ -477,6 +512,7 @@ bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
{ {
struct bdev_aio_group_channel *ch = ctx_buf; struct bdev_aio_group_channel *ch = ctx_buf;
close(ch->epfd);
spdk_poller_unregister(&ch->poller); spdk_poller_unregister(&ch->poller);
} }

View File

@ -37,6 +37,8 @@
#include "spdk/stdinc.h" #include "spdk/stdinc.h"
#include <libaio.h> #include <libaio.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
#include "spdk/queue.h" #include "spdk/queue.h"
#include "spdk/bdev.h" #include "spdk/bdev.h"
@ -54,6 +56,7 @@ struct bdev_aio_io_channel {
uint64_t io_inflight; uint64_t io_inflight;
struct spdk_io_channel *group_ch; struct spdk_io_channel *group_ch;
TAILQ_ENTRY(bdev_aio_io_channel) link; TAILQ_ENTRY(bdev_aio_io_channel) link;
int efd;
}; };
typedef void (*spdk_delete_aio_complete)(void *cb_arg, int bdeverrno); typedef void (*spdk_delete_aio_complete)(void *cb_arg, int bdeverrno);