This will be used to track time used in pollers - each poller can now indicate if it found any work to do or not. For cases where it was obvious and the infrastructure was already in place, existing pollers have been modified to return 0 or a positive value to indicate whether work was done. Other pollers have been modified to return -1 by default, indicating that the poller isn't indicating anything about whether work was performed. This will allow us to find un-annotated pollers easily in the future and fix them incrementally. Change-Id: Ifebfa56604a38434fac5c76ba7263267574ff199 Signed-off-by: Roman Sudarikov <roman.sudarikov@intel.com> Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com> Reviewed-on: https://review.gerrithub.io/391042 Tested-by: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
896 lines
19 KiB
C
896 lines
19 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright (c) Intel Corporation.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "spdk/stdinc.h"
|
|
#include "spdk/string.h"
|
|
|
|
#include <linux/nbd.h>
|
|
|
|
#include "spdk/nbd.h"
|
|
#include "nbd_internal.h"
|
|
#include "spdk/bdev.h"
|
|
#include "spdk/endian.h"
|
|
#include "spdk/env.h"
|
|
#include "spdk/log.h"
|
|
#include "spdk/util.h"
|
|
#include "spdk/io_channel.h"
|
|
|
|
#include "spdk_internal/log.h"
|
|
#include "spdk/queue.h"
|
|
|
|
#define GET_IO_LOOP_COUNT 16
|
|
|
|
enum nbd_io_state_t {
|
|
/* Receiving or ready to receive nbd request header */
|
|
NBD_IO_RECV_REQ = 0,
|
|
/* Receiving write payload */
|
|
NBD_IO_RECV_PAYLOAD,
|
|
/* Transmitting or ready to transmit nbd response header */
|
|
NBD_IO_XMIT_RESP,
|
|
/* Transmitting read payload */
|
|
NBD_IO_XMIT_PAYLOAD,
|
|
};
|
|
|
|
struct nbd_io {
|
|
struct spdk_nbd_disk *nbd;
|
|
enum nbd_io_state_t state;
|
|
|
|
void *payload;
|
|
|
|
/* NOTE: for TRIM, this represents number of bytes to trim. */
|
|
uint32_t payload_size;
|
|
|
|
struct nbd_request req;
|
|
struct nbd_reply resp;
|
|
|
|
/*
|
|
* Tracks current progress on reading/writing a request,
|
|
* response, or payload from the nbd socket.
|
|
*/
|
|
uint32_t offset;
|
|
|
|
TAILQ_ENTRY(nbd_io) tailq;
|
|
};
|
|
|
|
enum nbd_disk_state_t {
|
|
NBD_DISK_STATE_RUNNING = 0,
|
|
/* soft disconnection caused by receiving nbd_cmd_disc */
|
|
NBD_DISK_STATE_SOFTDISC,
|
|
/* hard disconnection caused by mandatory conditions */
|
|
NBD_DISK_STATE_HARDDISC,
|
|
};
|
|
|
|
struct spdk_nbd_disk {
|
|
struct spdk_bdev *bdev;
|
|
struct spdk_bdev_desc *bdev_desc;
|
|
struct spdk_io_channel *ch;
|
|
int dev_fd;
|
|
char *nbd_path;
|
|
int kernel_sp_fd;
|
|
int spdk_sp_fd;
|
|
struct spdk_poller *nbd_poller;
|
|
uint32_t buf_align;
|
|
|
|
struct nbd_io *io_in_recv;
|
|
TAILQ_HEAD(, nbd_io) received_io_list;
|
|
TAILQ_HEAD(, nbd_io) executed_io_list;
|
|
|
|
enum nbd_disk_state_t state;
|
|
/* count of nbd_io in spdk_nbd_disk */
|
|
int io_count;
|
|
|
|
TAILQ_ENTRY(spdk_nbd_disk) tailq;
|
|
};
|
|
|
|
struct spdk_nbd_disk_globals {
|
|
TAILQ_HEAD(, spdk_nbd_disk) disk_head;
|
|
};
|
|
|
|
static struct spdk_nbd_disk_globals g_spdk_nbd;
|
|
|
|
int
|
|
spdk_nbd_init(void)
|
|
{
|
|
TAILQ_INIT(&g_spdk_nbd.disk_head);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
spdk_nbd_fini(void)
|
|
{
|
|
struct spdk_nbd_disk *nbd_idx, *nbd_tmp;
|
|
|
|
/*
|
|
* Stop running spdk_nbd_disk.
|
|
* Here, nbd removing are unnecessary, but _SAFE variant
|
|
* is needed, since internal spdk_nbd_disk_unregister will
|
|
* remove nbd from TAILQ.
|
|
*/
|
|
TAILQ_FOREACH_SAFE(nbd_idx, &g_spdk_nbd.disk_head, tailq, nbd_tmp) {
|
|
spdk_nbd_stop(nbd_idx);
|
|
}
|
|
}
|
|
|
|
static int
|
|
spdk_nbd_disk_register(struct spdk_nbd_disk *nbd)
|
|
{
|
|
if (spdk_nbd_disk_find_by_nbd_path(nbd->nbd_path)) {
|
|
SPDK_NOTICELOG("%s is already exported\n", nbd->nbd_path);
|
|
return -1;
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&g_spdk_nbd.disk_head, nbd, tailq);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
spdk_nbd_disk_unregister(struct spdk_nbd_disk *nbd)
|
|
{
|
|
struct spdk_nbd_disk *nbd_idx, *nbd_tmp;
|
|
|
|
/*
|
|
* nbd disk may be stopped before registered.
|
|
* check whether it was registered.
|
|
*/
|
|
TAILQ_FOREACH_SAFE(nbd_idx, &g_spdk_nbd.disk_head, tailq, nbd_tmp) {
|
|
if (nbd == nbd_idx) {
|
|
TAILQ_REMOVE(&g_spdk_nbd.disk_head, nbd_idx, tailq);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
struct spdk_nbd_disk *
|
|
spdk_nbd_disk_find_by_nbd_path(const char *nbd_path)
|
|
{
|
|
struct spdk_nbd_disk *nbd;
|
|
|
|
/*
|
|
* check whether nbd has already been registered by nbd path.
|
|
*/
|
|
TAILQ_FOREACH(nbd, &g_spdk_nbd.disk_head, tailq) {
|
|
if (!strcmp(nbd->nbd_path, nbd_path)) {
|
|
return nbd;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
struct spdk_nbd_disk *spdk_nbd_disk_first(void)
|
|
{
|
|
return TAILQ_FIRST(&g_spdk_nbd.disk_head);
|
|
}
|
|
|
|
struct spdk_nbd_disk *spdk_nbd_disk_next(struct spdk_nbd_disk *prev)
|
|
{
|
|
return TAILQ_NEXT(prev, tailq);
|
|
}
|
|
|
|
const char *
|
|
spdk_nbd_disk_get_nbd_path(struct spdk_nbd_disk *nbd)
|
|
{
|
|
return nbd->nbd_path;
|
|
}
|
|
|
|
const char *
|
|
spdk_nbd_disk_get_bdev_name(struct spdk_nbd_disk *nbd)
|
|
{
|
|
return spdk_bdev_get_name(nbd->bdev);
|
|
}
|
|
|
|
void
|
|
nbd_disconnect(struct spdk_nbd_disk *nbd)
|
|
{
|
|
/*
|
|
* nbd soft-disconnection to terminate transmission phase.
|
|
* After receiving this ioctl command, nbd kernel module will send
|
|
* a NBD_CMD_DISC type io to nbd server in order to inform server.
|
|
*/
|
|
ioctl(nbd->dev_fd, NBD_DISCONNECT);
|
|
}
|
|
|
|
static struct nbd_io *
|
|
spdk_get_nbd_io(struct spdk_nbd_disk *nbd)
|
|
{
|
|
struct nbd_io *io;
|
|
|
|
io = calloc(1, sizeof(*io));
|
|
if (!io) {
|
|
SPDK_ERRLOG("Unable to get nbd_io\n");
|
|
abort();
|
|
}
|
|
|
|
io->nbd = nbd;
|
|
to_be32(&io->resp.magic, NBD_REPLY_MAGIC);
|
|
|
|
nbd->io_count++;
|
|
|
|
return io;
|
|
}
|
|
|
|
static void
|
|
spdk_put_nbd_io(struct spdk_nbd_disk *nbd, struct nbd_io *io)
|
|
{
|
|
if (io->payload) {
|
|
spdk_dma_free(io->payload);
|
|
}
|
|
free(io);
|
|
|
|
nbd->io_count--;
|
|
}
|
|
|
|
/*
|
|
* Check whether received nbd_io are all transmitted.
|
|
*
|
|
* \return 1 there is still some nbd_io not transmitted.
|
|
* 0 all nbd_io received are transmitted.
|
|
*/
|
|
static int
|
|
spdk_nbd_io_xmit_check(struct spdk_nbd_disk *nbd)
|
|
{
|
|
if (nbd->io_count == 0) {
|
|
return 0;
|
|
} else if (nbd->io_count == 1 && nbd->io_in_recv != NULL) {
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Check whether received nbd_io are all executed,
|
|
* and put back executed nbd_io instead of transmitting them
|
|
*
|
|
* \return 1 there is still some nbd_io under executing
|
|
* 0 all nbd_io gotten are freed.
|
|
*/
|
|
static int
|
|
spdk_nbd_cleanup_io(struct spdk_nbd_disk *nbd)
|
|
{
|
|
struct nbd_io *io, *io_tmp;
|
|
|
|
/* free io_in_recv */
|
|
if (nbd->io_in_recv != NULL) {
|
|
spdk_put_nbd_io(nbd, nbd->io_in_recv);
|
|
nbd->io_in_recv = NULL;
|
|
}
|
|
|
|
/* free io in received_io_list */
|
|
if (!TAILQ_EMPTY(&nbd->received_io_list)) {
|
|
TAILQ_FOREACH_SAFE(io, &nbd->received_io_list, tailq, io_tmp) {
|
|
TAILQ_REMOVE(&nbd->received_io_list, io, tailq);
|
|
spdk_put_nbd_io(nbd, io);
|
|
}
|
|
}
|
|
|
|
/* free io in executed_io_list */
|
|
if (!TAILQ_EMPTY(&nbd->executed_io_list)) {
|
|
TAILQ_FOREACH_SAFE(io, &nbd->executed_io_list, tailq, io_tmp) {
|
|
TAILQ_REMOVE(&nbd->executed_io_list, io, tailq);
|
|
spdk_put_nbd_io(nbd, io);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Some nbd_io may be under executing in bdev.
|
|
* Wait for their done operation.
|
|
*/
|
|
if (nbd->io_count != 0) {
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
_nbd_stop(struct spdk_nbd_disk *nbd)
|
|
{
|
|
if (nbd->ch) {
|
|
spdk_put_io_channel(nbd->ch);
|
|
}
|
|
|
|
if (nbd->bdev_desc) {
|
|
spdk_bdev_close(nbd->bdev_desc);
|
|
}
|
|
|
|
if (nbd->nbd_path) {
|
|
free(nbd->nbd_path);
|
|
}
|
|
|
|
if (nbd->spdk_sp_fd >= 0) {
|
|
close(nbd->spdk_sp_fd);
|
|
}
|
|
|
|
if (nbd->kernel_sp_fd >= 0) {
|
|
close(nbd->kernel_sp_fd);
|
|
}
|
|
|
|
if (nbd->dev_fd >= 0) {
|
|
ioctl(nbd->dev_fd, NBD_CLEAR_QUE);
|
|
ioctl(nbd->dev_fd, NBD_CLEAR_SOCK);
|
|
close(nbd->dev_fd);
|
|
}
|
|
|
|
if (nbd->nbd_poller) {
|
|
spdk_poller_unregister(&nbd->nbd_poller);
|
|
}
|
|
|
|
spdk_nbd_disk_unregister(nbd);
|
|
|
|
free(nbd);
|
|
}
|
|
|
|
void
|
|
spdk_nbd_stop(struct spdk_nbd_disk *nbd)
|
|
{
|
|
if (nbd == NULL) {
|
|
return;
|
|
}
|
|
|
|
nbd->state = NBD_DISK_STATE_HARDDISC;
|
|
|
|
/*
|
|
* Stop action should be called only after all nbd_io are executed.
|
|
*/
|
|
if (!spdk_nbd_cleanup_io(nbd)) {
|
|
_nbd_stop(nbd);
|
|
}
|
|
}
|
|
|
|
static int64_t
|
|
read_from_socket(int fd, void *buf, size_t length)
|
|
{
|
|
ssize_t bytes_read;
|
|
|
|
bytes_read = read(fd, buf, length);
|
|
if (bytes_read == 0) {
|
|
return -EIO;
|
|
} else if (bytes_read == -1) {
|
|
if (errno != EAGAIN) {
|
|
return -errno;
|
|
}
|
|
return 0;
|
|
} else {
|
|
return bytes_read;
|
|
}
|
|
}
|
|
|
|
static int64_t
|
|
write_to_socket(int fd, void *buf, size_t length)
|
|
{
|
|
ssize_t bytes_written;
|
|
|
|
bytes_written = write(fd, buf, length);
|
|
if (bytes_written == 0) {
|
|
return -EIO;
|
|
} else if (bytes_written == -1) {
|
|
if (errno != EAGAIN) {
|
|
return -errno;
|
|
}
|
|
return 0;
|
|
} else {
|
|
return bytes_written;
|
|
}
|
|
}
|
|
|
|
static void
|
|
nbd_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
{
|
|
struct nbd_io *io = cb_arg;
|
|
struct spdk_nbd_disk *nbd = io->nbd;
|
|
|
|
if (success) {
|
|
io->resp.error = 0;
|
|
} else {
|
|
to_be32(&io->resp.error, EIO);
|
|
}
|
|
|
|
memcpy(&io->resp.handle, &io->req.handle, sizeof(io->resp.handle));
|
|
TAILQ_INSERT_TAIL(&nbd->executed_io_list, io, tailq);
|
|
|
|
if (bdev_io != NULL) {
|
|
spdk_bdev_free_io(bdev_io);
|
|
}
|
|
|
|
if (nbd->state == NBD_DISK_STATE_HARDDISC && !spdk_nbd_cleanup_io(nbd)) {
|
|
_nbd_stop(nbd);
|
|
}
|
|
}
|
|
|
|
static int
|
|
nbd_submit_bdev_io(struct spdk_nbd_disk *nbd, struct nbd_io *io)
|
|
{
|
|
struct spdk_bdev_desc *desc = nbd->bdev_desc;
|
|
struct spdk_io_channel *ch = nbd->ch;
|
|
int rc = 0;
|
|
|
|
switch (from_be32(&io->req.type)) {
|
|
case NBD_CMD_READ:
|
|
rc = spdk_bdev_read(desc, ch, io->payload, from_be64(&io->req.from),
|
|
io->payload_size, nbd_io_done, io);
|
|
break;
|
|
case NBD_CMD_WRITE:
|
|
rc = spdk_bdev_write(desc, ch, io->payload, from_be64(&io->req.from),
|
|
io->payload_size, nbd_io_done, io);
|
|
break;
|
|
#ifdef NBD_FLAG_SEND_FLUSH
|
|
case NBD_CMD_FLUSH:
|
|
rc = spdk_bdev_flush(desc, ch, 0,
|
|
spdk_bdev_get_num_blocks(nbd->bdev) * spdk_bdev_get_block_size(nbd->bdev),
|
|
nbd_io_done, io);
|
|
break;
|
|
#endif
|
|
#ifdef NBD_FLAG_SEND_TRIM
|
|
case NBD_CMD_TRIM:
|
|
rc = spdk_bdev_unmap(desc, ch, from_be64(&io->req.from),
|
|
io->payload_size, nbd_io_done, io);
|
|
break;
|
|
#endif
|
|
case NBD_CMD_DISC:
|
|
spdk_put_nbd_io(nbd, io);
|
|
nbd->state = NBD_DISK_STATE_SOFTDISC;
|
|
break;
|
|
default:
|
|
rc = -1;
|
|
}
|
|
|
|
if (rc < 0) {
|
|
nbd_io_done(NULL, false, io);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_nbd_io_exec(struct spdk_nbd_disk *nbd)
|
|
{
|
|
struct nbd_io *io, *io_tmp;
|
|
int ret = 0;
|
|
|
|
/*
|
|
* For soft disconnection, nbd server must handle all outstanding
|
|
* request before closing connection.
|
|
*/
|
|
if (nbd->state == NBD_DISK_STATE_HARDDISC) {
|
|
return 0;
|
|
}
|
|
|
|
if (!TAILQ_EMPTY(&nbd->received_io_list)) {
|
|
TAILQ_FOREACH_SAFE(io, &nbd->received_io_list, tailq, io_tmp) {
|
|
TAILQ_REMOVE(&nbd->received_io_list, io, tailq);
|
|
ret = nbd_submit_bdev_io(nbd, io);
|
|
if (ret < 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
spdk_nbd_io_recv_internal(struct spdk_nbd_disk *nbd)
|
|
{
|
|
struct nbd_io *io;
|
|
int ret = 0;
|
|
|
|
if (nbd->io_in_recv == NULL) {
|
|
nbd->io_in_recv = spdk_get_nbd_io(nbd);
|
|
}
|
|
|
|
io = nbd->io_in_recv;
|
|
|
|
if (io->state == NBD_IO_RECV_REQ) {
|
|
ret = read_from_socket(nbd->spdk_sp_fd, (char *)&io->req + io->offset,
|
|
sizeof(io->req) - io->offset);
|
|
if (ret < 0) {
|
|
spdk_put_nbd_io(nbd, io);
|
|
nbd->io_in_recv = NULL;
|
|
return ret;
|
|
}
|
|
|
|
io->offset += ret;
|
|
|
|
/* request is fully received */
|
|
if (io->offset == sizeof(io->req)) {
|
|
io->offset = 0;
|
|
|
|
/* req magic check */
|
|
if (from_be32(&io->req.magic) != NBD_REQUEST_MAGIC) {
|
|
SPDK_ERRLOG("invalid request magic\n");
|
|
spdk_put_nbd_io(nbd, io);
|
|
nbd->io_in_recv = NULL;
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* io except read/write should ignore payload */
|
|
if (from_be32(&io->req.type) == NBD_CMD_WRITE ||
|
|
from_be32(&io->req.type) == NBD_CMD_READ) {
|
|
io->payload_size = from_be32(&io->req.len);
|
|
} else {
|
|
io->payload_size = 0;
|
|
}
|
|
|
|
/* io payload allocate */
|
|
if (io->payload_size) {
|
|
io->payload = spdk_dma_malloc(io->payload_size, nbd->buf_align, NULL);
|
|
if (io->payload == NULL) {
|
|
SPDK_ERRLOG("could not allocate io->payload of size %d\n", io->payload_size);
|
|
spdk_put_nbd_io(nbd, io);
|
|
nbd->io_in_recv = NULL;
|
|
return -ENOMEM;
|
|
}
|
|
} else {
|
|
io->payload = NULL;
|
|
}
|
|
|
|
/* next io step */
|
|
if (from_be32(&io->req.type) == NBD_CMD_WRITE) {
|
|
io->state = NBD_IO_RECV_PAYLOAD;
|
|
} else {
|
|
io->state = NBD_IO_XMIT_RESP;
|
|
nbd->io_in_recv = NULL;
|
|
TAILQ_INSERT_TAIL(&nbd->received_io_list, io, tailq);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (io->state == NBD_IO_RECV_PAYLOAD) {
|
|
ret = read_from_socket(nbd->spdk_sp_fd, io->payload + io->offset, io->payload_size - io->offset);
|
|
if (ret < 0) {
|
|
spdk_put_nbd_io(nbd, io);
|
|
nbd->io_in_recv = NULL;
|
|
return ret;
|
|
}
|
|
|
|
io->offset += ret;
|
|
|
|
/* request payload is fully received */
|
|
if (io->offset == io->payload_size) {
|
|
io->offset = 0;
|
|
io->state = NBD_IO_XMIT_RESP;
|
|
nbd->io_in_recv = NULL;
|
|
TAILQ_INSERT_TAIL(&nbd->received_io_list, io, tailq);
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_nbd_io_recv(struct spdk_nbd_disk *nbd)
|
|
{
|
|
int i, ret = 0;
|
|
|
|
/*
|
|
* nbd server should not accept request in both soft and hard
|
|
* disconnect states.
|
|
*/
|
|
if (nbd->state != NBD_DISK_STATE_RUNNING) {
|
|
return 0;
|
|
}
|
|
|
|
for (i = 0; i < GET_IO_LOOP_COUNT; i++) {
|
|
ret = spdk_nbd_io_recv_internal(nbd);
|
|
if (ret != 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_nbd_io_xmit_internal(struct spdk_nbd_disk *nbd)
|
|
{
|
|
struct nbd_io *io;
|
|
int ret = 0;
|
|
|
|
io = TAILQ_FIRST(&nbd->executed_io_list);
|
|
if (io == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
/* resp error and handler are already set in io_done */
|
|
|
|
if (io->state == NBD_IO_XMIT_RESP) {
|
|
ret = write_to_socket(nbd->spdk_sp_fd, (char *)&io->resp + io->offset,
|
|
sizeof(io->resp) - io->offset);
|
|
if (ret <= 0) {
|
|
return ret;
|
|
}
|
|
|
|
io->offset += ret;
|
|
|
|
/* response is fully transmitted */
|
|
if (io->offset == sizeof(io->resp)) {
|
|
io->offset = 0;
|
|
|
|
/* transmit payload only when NBD_CMD_READ with no resp error */
|
|
if (from_be32(&io->req.type) != NBD_CMD_READ || io->resp.error != 0) {
|
|
TAILQ_REMOVE(&nbd->executed_io_list, io, tailq);
|
|
spdk_put_nbd_io(nbd, io);
|
|
return 0;
|
|
} else {
|
|
io->state = NBD_IO_XMIT_PAYLOAD;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (io->state == NBD_IO_XMIT_PAYLOAD) {
|
|
ret = write_to_socket(nbd->spdk_sp_fd, io->payload + io->offset, io->payload_size - io->offset);
|
|
if (ret <= 0) {
|
|
return ret;
|
|
}
|
|
|
|
io->offset += ret;
|
|
|
|
/* read payload is fully transmitted */
|
|
if (io->offset == io->payload_size) {
|
|
TAILQ_REMOVE(&nbd->executed_io_list, io, tailq);
|
|
spdk_put_nbd_io(nbd, io);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_nbd_io_xmit(struct spdk_nbd_disk *nbd)
|
|
{
|
|
int ret = 0;
|
|
|
|
/*
|
|
* For soft disconnection, nbd server must handle all outstanding
|
|
* request before closing connection.
|
|
*/
|
|
if (nbd->state == NBD_DISK_STATE_HARDDISC) {
|
|
return 0;
|
|
}
|
|
|
|
while (!TAILQ_EMPTY(&nbd->executed_io_list)) {
|
|
ret = spdk_nbd_io_xmit_internal(nbd);
|
|
if (ret != 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* For soft disconnection, nbd server can close connection after all
|
|
* outstanding request are transmitted.
|
|
*/
|
|
if (nbd->state == NBD_DISK_STATE_SOFTDISC && !spdk_nbd_io_xmit_check(nbd)) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Poll an NBD instance.
|
|
*
|
|
* \return 0 on success or negated errno values on error (e.g. connection closed).
|
|
*/
|
|
static int
|
|
_spdk_nbd_poll(struct spdk_nbd_disk *nbd)
|
|
{
|
|
int rc;
|
|
|
|
/* transmit executed io first */
|
|
rc = spdk_nbd_io_xmit(nbd);
|
|
if (rc < 0) {
|
|
return rc;
|
|
}
|
|
|
|
rc = spdk_nbd_io_recv(nbd);
|
|
if (rc < 0) {
|
|
return rc;
|
|
}
|
|
|
|
rc = spdk_nbd_io_exec(nbd);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
spdk_nbd_poll(void *arg)
|
|
{
|
|
struct spdk_nbd_disk *nbd = arg;
|
|
int rc;
|
|
|
|
rc = _spdk_nbd_poll(nbd);
|
|
if (rc < 0) {
|
|
SPDK_INFOLOG(SPDK_LOG_NBD, "spdk_nbd_poll() returned %s (%d); closing connection\n",
|
|
spdk_strerror(-rc), rc);
|
|
spdk_nbd_stop(nbd);
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static void *
|
|
nbd_start_kernel(void *arg)
|
|
{
|
|
int dev_fd = (int)(intptr_t)arg;
|
|
|
|
spdk_unaffinitize_thread();
|
|
|
|
/* This will block in the kernel until we close the spdk_sp_fd. */
|
|
ioctl(dev_fd, NBD_DO_IT);
|
|
|
|
pthread_exit(NULL);
|
|
}
|
|
|
|
static void
|
|
spdk_nbd_bdev_hot_remove(void *remove_ctx)
|
|
{
|
|
struct spdk_nbd_disk *nbd = remove_ctx;
|
|
|
|
spdk_nbd_stop(nbd);
|
|
}
|
|
|
|
struct spdk_nbd_disk *
|
|
spdk_nbd_start(const char *bdev_name, const char *nbd_path)
|
|
{
|
|
struct spdk_nbd_disk *nbd;
|
|
struct spdk_bdev *bdev;
|
|
pthread_t tid;
|
|
int rc;
|
|
int sp[2];
|
|
int flag;
|
|
|
|
bdev = spdk_bdev_get_by_name(bdev_name);
|
|
if (bdev == NULL) {
|
|
SPDK_ERRLOG("no bdev %s exists\n", bdev_name);
|
|
return NULL;
|
|
}
|
|
|
|
nbd = calloc(1, sizeof(*nbd));
|
|
if (nbd == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
nbd->dev_fd = -1;
|
|
nbd->spdk_sp_fd = -1;
|
|
nbd->kernel_sp_fd = -1;
|
|
|
|
rc = spdk_bdev_open(bdev, true, spdk_nbd_bdev_hot_remove, nbd, &nbd->bdev_desc);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("could not open bdev %s, error=%d\n", spdk_bdev_get_name(bdev), rc);
|
|
goto err;
|
|
}
|
|
|
|
nbd->bdev = bdev;
|
|
|
|
nbd->ch = spdk_bdev_get_io_channel(nbd->bdev_desc);
|
|
nbd->buf_align = spdk_max(spdk_bdev_get_buf_align(bdev), 64);
|
|
|
|
rc = socketpair(AF_UNIX, SOCK_STREAM, 0, sp);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("socketpair failed\n");
|
|
goto err;
|
|
}
|
|
|
|
nbd->spdk_sp_fd = sp[0];
|
|
nbd->kernel_sp_fd = sp[1];
|
|
nbd->nbd_path = strdup(nbd_path);
|
|
if (!nbd->nbd_path) {
|
|
SPDK_ERRLOG("strdup allocation failure\n");
|
|
goto err;
|
|
}
|
|
|
|
TAILQ_INIT(&nbd->received_io_list);
|
|
TAILQ_INIT(&nbd->executed_io_list);
|
|
|
|
/* Add nbd_disk to the end of disk list */
|
|
rc = spdk_nbd_disk_register(nbd);
|
|
if (rc != 0) {
|
|
goto err;
|
|
}
|
|
|
|
nbd->dev_fd = open(nbd_path, O_RDWR);
|
|
if (nbd->dev_fd == -1) {
|
|
SPDK_ERRLOG("open(\"%s\") failed: %s\n", nbd_path, spdk_strerror(errno));
|
|
goto err;
|
|
}
|
|
|
|
rc = ioctl(nbd->dev_fd, NBD_SET_BLKSIZE, spdk_bdev_get_block_size(bdev));
|
|
if (rc == -1) {
|
|
SPDK_ERRLOG("ioctl(NBD_SET_BLKSIZE) failed: %s\n", spdk_strerror(errno));
|
|
goto err;
|
|
}
|
|
|
|
rc = ioctl(nbd->dev_fd, NBD_SET_SIZE_BLOCKS, spdk_bdev_get_num_blocks(bdev));
|
|
if (rc == -1) {
|
|
SPDK_ERRLOG("ioctl(NBD_SET_SIZE_BLOCKS) failed: %s\n", spdk_strerror(errno));
|
|
goto err;
|
|
}
|
|
|
|
rc = ioctl(nbd->dev_fd, NBD_CLEAR_SOCK);
|
|
if (rc == -1) {
|
|
SPDK_ERRLOG("ioctl(NBD_CLEAR_SOCK) failed: %s\n", spdk_strerror(errno));
|
|
goto err;
|
|
}
|
|
|
|
SPDK_INFOLOG(SPDK_LOG_NBD, "Enabling kernel access to bdev %s via %s\n",
|
|
spdk_bdev_get_name(bdev), nbd_path);
|
|
|
|
rc = ioctl(nbd->dev_fd, NBD_SET_SOCK, nbd->kernel_sp_fd);
|
|
if (rc == -1) {
|
|
SPDK_ERRLOG("ioctl(NBD_SET_SOCK) failed: %s\n", spdk_strerror(errno));
|
|
goto err;
|
|
}
|
|
|
|
#ifdef NBD_FLAG_SEND_TRIM
|
|
rc = ioctl(nbd->dev_fd, NBD_SET_FLAGS, NBD_FLAG_SEND_TRIM);
|
|
if (rc == -1) {
|
|
SPDK_ERRLOG("ioctl(NBD_SET_FLAGS) failed: %s\n", spdk_strerror(errno));
|
|
goto err;
|
|
}
|
|
#endif
|
|
|
|
rc = pthread_create(&tid, NULL, nbd_start_kernel, (void *)(intptr_t)nbd->dev_fd);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("could not create thread: %s\n", spdk_strerror(rc));
|
|
goto err;
|
|
}
|
|
|
|
rc = pthread_detach(tid);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("could not detach thread for nbd kernel: %s\n", spdk_strerror(rc));
|
|
goto err;
|
|
}
|
|
|
|
flag = fcntl(nbd->spdk_sp_fd, F_GETFL);
|
|
if (fcntl(nbd->spdk_sp_fd, F_SETFL, flag | O_NONBLOCK) < 0) {
|
|
SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n",
|
|
nbd->spdk_sp_fd, spdk_strerror(errno));
|
|
goto err;
|
|
}
|
|
|
|
nbd->nbd_poller = spdk_poller_register(spdk_nbd_poll, nbd, 0);
|
|
|
|
return nbd;
|
|
|
|
err:
|
|
spdk_nbd_stop(nbd);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT("nbd", SPDK_LOG_NBD)
|