nbd: improve nbd to handle overlapped I/O

Previous nbd implementation only processes one I/O at a time.
This patch increases spdk_nbd_disk io queue-depth.
lists are added to accommodate and coordinate overplapped nbd_io.

Change-Id: I3bda2c957a561b884ebfe9550b7cb6f3c991aef8
Signed-off-by: Xiaodong Liu <xiaodong.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/392609
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Xiaodong Liu 2017-12-20 03:50:43 -05:00 committed by Jim Harris
parent 90ef3021bc
commit bd6de97a0e

View File

@ -46,14 +46,15 @@
#include "spdk/io_channel.h" #include "spdk/io_channel.h"
#include "spdk_internal/log.h" #include "spdk_internal/log.h"
#include "spdk/queue.h"
#define GET_IO_LOOP_COUNT 16
enum nbd_io_state_t { enum nbd_io_state_t {
/* Receiving or ready to receive nbd request header */ /* Receiving or ready to receive nbd request header */
NBD_IO_RECV_REQ = 0, NBD_IO_RECV_REQ = 0,
/* Receiving write payload */ /* Receiving write payload */
NBD_IO_RECV_PAYLOAD, NBD_IO_RECV_PAYLOAD,
/* processing or ready to process of SPDK bdev module */
NBD_IO_PROCESS,
/* Transmitting or ready to transmit nbd response header */ /* Transmitting or ready to transmit nbd response header */
NBD_IO_XMIT_RESP, NBD_IO_XMIT_RESP,
/* Transmitting read payload */ /* Transmitting read payload */
@ -64,7 +65,6 @@ struct nbd_io {
struct spdk_nbd_disk *nbd; struct spdk_nbd_disk *nbd;
enum nbd_io_state_t state; enum nbd_io_state_t state;
int ref;
void *payload; void *payload;
/* NOTE: for TRIM, this represents number of bytes to trim. */ /* NOTE: for TRIM, this represents number of bytes to trim. */
@ -78,6 +78,16 @@ struct nbd_io {
* response, or payload from the nbd socket. * response, or payload from the nbd socket.
*/ */
uint32_t offset; uint32_t offset;
TAILQ_ENTRY(nbd_io) tailq;
};
enum nbd_disk_state_t {
NBD_DISK_STATE_RUNNING = 0,
/* soft disconnection caused by receiving nbd_cmd_disc */
NBD_DISK_STATE_SOFTDISC,
/* hard disconnection caused by mandatory conditions */
NBD_DISK_STATE_HARDDISC,
}; };
struct spdk_nbd_disk { struct spdk_nbd_disk {
@ -88,10 +98,17 @@ struct spdk_nbd_disk {
char *nbd_path; char *nbd_path;
int kernel_sp_fd; int kernel_sp_fd;
int spdk_sp_fd; int spdk_sp_fd;
struct nbd_io io;
struct spdk_poller *nbd_poller; struct spdk_poller *nbd_poller;
uint32_t buf_align; uint32_t buf_align;
struct nbd_io *io_in_recv;
TAILQ_HEAD(, nbd_io) received_io_list;
TAILQ_HEAD(, nbd_io) executed_io_list;
enum nbd_disk_state_t state;
/* count of nbd_io in spdk_nbd_disk */
int io_count;
TAILQ_ENTRY(spdk_nbd_disk) tailq; TAILQ_ENTRY(spdk_nbd_disk) tailq;
}; };
@ -205,6 +222,99 @@ nbd_disconnect(struct spdk_nbd_disk *nbd)
ioctl(nbd->dev_fd, NBD_DISCONNECT); ioctl(nbd->dev_fd, NBD_DISCONNECT);
} }
static struct nbd_io *
spdk_get_nbd_io(struct spdk_nbd_disk *nbd)
{
struct nbd_io *io;
io = calloc(1, sizeof(*io));
if (!io) {
SPDK_ERRLOG("Unable to get nbd_io\n");
abort();
}
io->nbd = nbd;
to_be32(&io->resp.magic, NBD_REPLY_MAGIC);
nbd->io_count++;
return io;
}
static void
spdk_put_nbd_io(struct spdk_nbd_disk *nbd, struct nbd_io *io)
{
if (io->payload) {
spdk_dma_free(io->payload);
}
free(io);
nbd->io_count--;
}
/*
* Check whether received nbd_io are all transmitted.
*
* \return 1 there is still some nbd_io not transmitted.
* 0 all nbd_io received are transmitted.
*/
static int
spdk_nbd_io_xmit_check(struct spdk_nbd_disk *nbd)
{
if (nbd->io_count == 0) {
return 0;
} else if (nbd->io_count == 1 && nbd->io_in_recv != NULL) {
return 0;
}
return 1;
}
/*
* Check whether received nbd_io are all executed,
* and put back executed nbd_io instead of transmitting them
*
* \return 1 there is still some nbd_io under executing
* 0 all nbd_io gotten are freed.
*/
static int
spdk_nbd_cleanup_io(struct spdk_nbd_disk *nbd)
{
struct nbd_io *io, *io_tmp;
/* free io_in_recv */
if (nbd->io_in_recv != NULL) {
spdk_put_nbd_io(nbd, nbd->io_in_recv);
nbd->io_in_recv = NULL;
}
/* free io in received_io_list */
if (!TAILQ_EMPTY(&nbd->received_io_list)) {
TAILQ_FOREACH_SAFE(io, &nbd->received_io_list, tailq, io_tmp) {
TAILQ_REMOVE(&nbd->received_io_list, io, tailq);
spdk_put_nbd_io(nbd, io);
}
}
/* free io in executed_io_list */
if (!TAILQ_EMPTY(&nbd->executed_io_list)) {
TAILQ_FOREACH_SAFE(io, &nbd->executed_io_list, tailq, io_tmp) {
TAILQ_REMOVE(&nbd->executed_io_list, io, tailq);
spdk_put_nbd_io(nbd, io);
}
}
/*
* Some nbd_io may be under executing in bdev.
* Wait for their done operation.
*/
if (nbd->io_count != 0) {
return 1;
}
return 0;
}
static void static void
_nbd_stop(struct spdk_nbd_disk *nbd) _nbd_stop(struct spdk_nbd_disk *nbd)
{ {
@ -250,8 +360,12 @@ spdk_nbd_stop(struct spdk_nbd_disk *nbd)
return; return;
} }
nbd->io.ref--; nbd->state = NBD_DISK_STATE_HARDDISC;
if (nbd->io.ref == 0) {
/*
* Stop action should be called only after all nbd_io are executed.
*/
if (!spdk_nbd_cleanup_io(nbd)) {
_nbd_stop(nbd); _nbd_stop(nbd);
} }
} }
@ -305,14 +419,13 @@ nbd_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
} }
memcpy(&io->resp.handle, &io->req.handle, sizeof(io->resp.handle)); memcpy(&io->resp.handle, &io->req.handle, sizeof(io->resp.handle));
io->state = NBD_IO_XMIT_RESP; TAILQ_INSERT_TAIL(&nbd->executed_io_list, io, tailq);
if (bdev_io != NULL) { if (bdev_io != NULL) {
spdk_bdev_free_io(bdev_io); spdk_bdev_free_io(bdev_io);
} }
io->ref--; if (nbd->state == NBD_DISK_STATE_HARDDISC && !spdk_nbd_cleanup_io(nbd)) {
if (io->ref == 0) {
_nbd_stop(nbd); _nbd_stop(nbd);
} }
} }
@ -324,8 +437,6 @@ nbd_submit_bdev_io(struct spdk_nbd_disk *nbd, struct nbd_io *io)
struct spdk_io_channel *ch = nbd->ch; struct spdk_io_channel *ch = nbd->ch;
int rc = 0; int rc = 0;
io->ref++;
switch (from_be32(&io->req.type)) { switch (from_be32(&io->req.type)) {
case NBD_CMD_READ: case NBD_CMD_READ:
rc = spdk_bdev_read(desc, ch, io->payload, from_be64(&io->req.from), rc = spdk_bdev_read(desc, ch, io->payload, from_be64(&io->req.from),
@ -349,8 +460,9 @@ nbd_submit_bdev_io(struct spdk_nbd_disk *nbd, struct nbd_io *io)
break; break;
#endif #endif
case NBD_CMD_DISC: case NBD_CMD_DISC:
io->ref--; spdk_put_nbd_io(nbd, io);
return -ECONNRESET; nbd->state = NBD_DISK_STATE_SOFTDISC;
break;
default: default:
rc = -1; rc = -1;
} }
@ -363,28 +475,50 @@ nbd_submit_bdev_io(struct spdk_nbd_disk *nbd, struct nbd_io *io)
} }
static int static int
spdk_nbd_io_exec(struct nbd_io *io) spdk_nbd_io_exec(struct spdk_nbd_disk *nbd)
{ {
struct spdk_nbd_disk *nbd = io->nbd; struct nbd_io *io, *io_tmp;
int ret = 0; int ret = 0;
if (io->state == NBD_IO_PROCESS) { /*
ret = nbd_submit_bdev_io(nbd, io); * For soft disconnection, nbd server must handle all outstanding
* request before closing connection.
*/
if (nbd->state == NBD_DISK_STATE_HARDDISC) {
return 0;
}
if (!TAILQ_EMPTY(&nbd->received_io_list)) {
TAILQ_FOREACH_SAFE(io, &nbd->received_io_list, tailq, io_tmp) {
TAILQ_REMOVE(&nbd->received_io_list, io, tailq);
ret = nbd_submit_bdev_io(nbd, io);
if (ret < 0) {
break;
}
}
} }
return ret; return ret;
} }
static int static int
spdk_nbd_io_recv(struct nbd_io *io) spdk_nbd_io_recv_internal(struct spdk_nbd_disk *nbd)
{ {
struct spdk_nbd_disk *nbd = io->nbd; struct nbd_io *io;
int ret = 0; int ret = 0;
if (nbd->io_in_recv == NULL) {
nbd->io_in_recv = spdk_get_nbd_io(nbd);
}
io = nbd->io_in_recv;
if (io->state == NBD_IO_RECV_REQ) { if (io->state == NBD_IO_RECV_REQ) {
ret = read_from_socket(nbd->spdk_sp_fd, (char *)&io->req + io->offset, ret = read_from_socket(nbd->spdk_sp_fd, (char *)&io->req + io->offset,
sizeof(io->req) - io->offset); sizeof(io->req) - io->offset);
if (ret <= 0) { if (ret < 0) {
spdk_put_nbd_io(nbd, io);
nbd->io_in_recv = NULL;
return ret; return ret;
} }
@ -397,6 +531,8 @@ spdk_nbd_io_recv(struct nbd_io *io)
/* req magic check */ /* req magic check */
if (from_be32(&io->req.magic) != NBD_REQUEST_MAGIC) { if (from_be32(&io->req.magic) != NBD_REQUEST_MAGIC) {
SPDK_ERRLOG("invalid request magic\n"); SPDK_ERRLOG("invalid request magic\n");
spdk_put_nbd_io(nbd, io);
nbd->io_in_recv = NULL;
return -EINVAL; return -EINVAL;
} }
@ -413,6 +549,8 @@ spdk_nbd_io_recv(struct nbd_io *io)
io->payload = spdk_dma_malloc(io->payload_size, nbd->buf_align, NULL); io->payload = spdk_dma_malloc(io->payload_size, nbd->buf_align, NULL);
if (io->payload == NULL) { if (io->payload == NULL) {
SPDK_ERRLOG("could not allocate io->payload of size %d\n", io->payload_size); SPDK_ERRLOG("could not allocate io->payload of size %d\n", io->payload_size);
spdk_put_nbd_io(nbd, io);
nbd->io_in_recv = NULL;
return -ENOMEM; return -ENOMEM;
} }
} else { } else {
@ -423,15 +561,18 @@ spdk_nbd_io_recv(struct nbd_io *io)
if (from_be32(&io->req.type) == NBD_CMD_WRITE) { if (from_be32(&io->req.type) == NBD_CMD_WRITE) {
io->state = NBD_IO_RECV_PAYLOAD; io->state = NBD_IO_RECV_PAYLOAD;
} else { } else {
io->state = NBD_IO_PROCESS; io->state = NBD_IO_XMIT_RESP;
ret = spdk_nbd_io_exec(io); nbd->io_in_recv = NULL;
TAILQ_INSERT_TAIL(&nbd->received_io_list, io, tailq);
} }
} }
} }
if (io->state == NBD_IO_RECV_PAYLOAD) { if (io->state == NBD_IO_RECV_PAYLOAD) {
ret = read_from_socket(nbd->spdk_sp_fd, io->payload + io->offset, io->payload_size - io->offset); ret = read_from_socket(nbd->spdk_sp_fd, io->payload + io->offset, io->payload_size - io->offset);
if (ret <= 0) { if (ret < 0) {
spdk_put_nbd_io(nbd, io);
nbd->io_in_recv = NULL;
return ret; return ret;
} }
@ -440,21 +581,50 @@ spdk_nbd_io_recv(struct nbd_io *io)
/* request payload is fully received */ /* request payload is fully received */
if (io->offset == io->payload_size) { if (io->offset == io->payload_size) {
io->offset = 0; io->offset = 0;
io->state = NBD_IO_PROCESS; io->state = NBD_IO_XMIT_RESP;
ret = spdk_nbd_io_exec(io); nbd->io_in_recv = NULL;
TAILQ_INSERT_TAIL(&nbd->received_io_list, io, tailq);
} }
} }
return ret; return 0;
} }
static int static int
spdk_nbd_io_xmit(struct nbd_io *io) spdk_nbd_io_recv(struct spdk_nbd_disk *nbd)
{ {
struct spdk_nbd_disk *nbd = io->nbd; int i, ret = 0;
/*
* nbd server should not accept request in both soft and hard
* disconnect states.
*/
if (nbd->state != NBD_DISK_STATE_RUNNING) {
return 0;
}
for (i = 0; i < GET_IO_LOOP_COUNT; i++) {
ret = spdk_nbd_io_recv_internal(nbd);
if (ret != 0) {
return ret;
}
}
return 0;
}
static int
spdk_nbd_io_xmit_internal(struct spdk_nbd_disk *nbd)
{
struct nbd_io *io;
int ret = 0; int ret = 0;
io = TAILQ_FIRST(&nbd->executed_io_list);
if (io == NULL) {
return 0;
}
/* resp error and handler are already set in io_done */ /* resp error and handler are already set in io_done */
if (io->state == NBD_IO_XMIT_RESP) { if (io->state == NBD_IO_XMIT_RESP) {
@ -472,11 +642,9 @@ spdk_nbd_io_xmit(struct nbd_io *io)
/* transmit payload only when NBD_CMD_READ with no resp error */ /* transmit payload only when NBD_CMD_READ with no resp error */
if (from_be32(&io->req.type) != NBD_CMD_READ || io->resp.error != 0) { if (from_be32(&io->req.type) != NBD_CMD_READ || io->resp.error != 0) {
io->state = NBD_IO_RECV_REQ; TAILQ_REMOVE(&nbd->executed_io_list, io, tailq);
if (io->payload) { spdk_put_nbd_io(nbd, io);
spdk_dma_free(io->payload); return 0;
io->payload = NULL;
}
} else { } else {
io->state = NBD_IO_XMIT_PAYLOAD; io->state = NBD_IO_XMIT_PAYLOAD;
} }
@ -493,18 +661,45 @@ spdk_nbd_io_xmit(struct nbd_io *io)
/* read payload is fully transmitted */ /* read payload is fully transmitted */
if (io->offset == io->payload_size) { if (io->offset == io->payload_size) {
io->offset = 0; TAILQ_REMOVE(&nbd->executed_io_list, io, tailq);
io->state = NBD_IO_RECV_REQ; spdk_put_nbd_io(nbd, io);
if (io->payload) {
spdk_dma_free(io->payload);
io->payload = NULL;
}
} }
} }
return 0; return 0;
} }
static int
spdk_nbd_io_xmit(struct spdk_nbd_disk *nbd)
{
int ret = 0;
/*
* For soft disconnection, nbd server must handle all outstanding
* request before closing connection.
*/
if (nbd->state == NBD_DISK_STATE_HARDDISC) {
return 0;
}
while (!TAILQ_EMPTY(&nbd->executed_io_list)) {
ret = spdk_nbd_io_xmit_internal(nbd);
if (ret != 0) {
return ret;
}
}
/*
* For soft disconnection, nbd server can close connection after all
* outstanding request are transmitted.
*/
if (nbd->state == NBD_DISK_STATE_SOFTDISC && !spdk_nbd_io_xmit_check(nbd)) {
return -1;
}
return 0;
}
/** /**
* Poll an NBD instance. * Poll an NBD instance.
* *
@ -513,15 +708,20 @@ spdk_nbd_io_xmit(struct nbd_io *io)
static int static int
_spdk_nbd_poll(struct spdk_nbd_disk *nbd) _spdk_nbd_poll(struct spdk_nbd_disk *nbd)
{ {
struct nbd_io *io = &nbd->io;
int rc; int rc;
rc = spdk_nbd_io_recv(io); /* transmit executed io first */
rc = spdk_nbd_io_xmit(nbd);
if (rc < 0) { if (rc < 0) {
return rc; return rc;
} }
rc = spdk_nbd_io_xmit(io); rc = spdk_nbd_io_recv(nbd);
if (rc < 0) {
return rc;
}
rc = spdk_nbd_io_exec(nbd);
return rc; return rc;
} }
@ -582,7 +782,6 @@ spdk_nbd_start(const char *bdev_name, const char *nbd_path)
return NULL; return NULL;
} }
nbd->io.nbd = nbd;
nbd->dev_fd = -1; nbd->dev_fd = -1;
nbd->spdk_sp_fd = -1; nbd->spdk_sp_fd = -1;
nbd->kernel_sp_fd = -1; nbd->kernel_sp_fd = -1;
@ -593,7 +792,6 @@ spdk_nbd_start(const char *bdev_name, const char *nbd_path)
goto err; goto err;
} }
nbd->io.ref = 1;
nbd->bdev = bdev; nbd->bdev = bdev;
nbd->ch = spdk_bdev_get_io_channel(nbd->bdev_desc); nbd->ch = spdk_bdev_get_io_channel(nbd->bdev_desc);
@ -612,6 +810,10 @@ spdk_nbd_start(const char *bdev_name, const char *nbd_path)
SPDK_ERRLOG("strdup allocation failure\n"); SPDK_ERRLOG("strdup allocation failure\n");
goto err; goto err;
} }
TAILQ_INIT(&nbd->received_io_list);
TAILQ_INIT(&nbd->executed_io_list);
/* Add nbd_disk to the end of disk list */ /* Add nbd_disk to the end of disk list */
rc = spdk_nbd_disk_register(nbd); rc = spdk_nbd_disk_register(nbd);
if (rc != 0) { if (rc != 0) {
@ -678,8 +880,6 @@ spdk_nbd_start(const char *bdev_name, const char *nbd_path)
goto err; goto err;
} }
to_be32(&nbd->io.resp.magic, NBD_REPLY_MAGIC);
nbd->nbd_poller = spdk_poller_register(spdk_nbd_poll, nbd, 0); nbd->nbd_poller = spdk_poller_register(spdk_nbd_poll, nbd, 0);
return nbd; return nbd;