nbd: move nbd code to a new library
This prepares for enabling nbd mounting as a service within existing applications. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I225ae20dc86b2d70a1a39a325e754b22f34feaee Reviewed-on: https://review.gerrithub.io/369674 Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com> Tested-by: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
62d3b6897a
commit
7e9f556363
42
include/spdk/nbd.h
Normal file
42
include/spdk/nbd.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*-
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright (c) Intel Corporation.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SPDK_NBD_H_
|
||||
#define SPDK_NBD_H_
|
||||
|
||||
struct spdk_bdev;
|
||||
|
||||
int spdk_nbd_start(struct spdk_bdev *bdev, const char *nbd_path);
|
||||
void spdk_nbd_stop(void);
|
||||
|
||||
#endif
|
@ -38,6 +38,7 @@ DIRS-y += bdev blob blobfs conf copy cunit event json jsonrpc \
|
||||
log env_dpdk net rpc trace util nvme nvmf scsi ioat \
|
||||
ut_mock iscsi
|
||||
ifeq ($(OS),Linux)
|
||||
DIRS-y += nbd
|
||||
DIRS-$(CONFIG_VHOST) += vhost
|
||||
endif
|
||||
|
||||
|
40
lib/nbd/Makefile
Normal file
40
lib/nbd/Makefile
Normal file
@ -0,0 +1,40 @@
|
||||
#
|
||||
# BSD LICENSE
|
||||
#
|
||||
# Copyright (c) Intel Corporation.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
|
||||
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
|
||||
|
||||
LIBNAME = nbd
|
||||
C_SRCS = nbd.c
|
||||
|
||||
include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
|
413
lib/nbd/nbd.c
Normal file
413
lib/nbd/nbd.c
Normal file
@ -0,0 +1,413 @@
|
||||
/*-
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright (c) Intel Corporation.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "spdk/stdinc.h"
|
||||
|
||||
#include <linux/nbd.h>
|
||||
|
||||
#include "spdk/nbd.h"
|
||||
#include "spdk/bdev.h"
|
||||
#include "spdk/endian.h"
|
||||
#include "spdk/env.h"
|
||||
#include "spdk/event.h"
|
||||
#include "spdk/log.h"
|
||||
#include "spdk/util.h"
|
||||
#include "spdk/io_channel.h"
|
||||
|
||||
struct nbd_io {
|
||||
enum spdk_bdev_io_type type;
|
||||
void *payload;
|
||||
|
||||
/* NOTE: for TRIM, this represents number of bytes to trim. */
|
||||
uint32_t payload_size;
|
||||
|
||||
bool payload_in_progress;
|
||||
|
||||
struct nbd_request req;
|
||||
bool req_in_progress;
|
||||
|
||||
struct nbd_reply resp;
|
||||
bool resp_in_progress;
|
||||
|
||||
struct spdk_scsi_unmap_bdesc unmap;
|
||||
|
||||
/*
|
||||
* Tracks current progress on reading/writing a request,
|
||||
* response, or payload from the nbd socket.
|
||||
*/
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
struct nbd_disk {
|
||||
struct spdk_bdev *bdev;
|
||||
struct spdk_bdev_desc *bdev_desc;
|
||||
struct spdk_io_channel *ch;
|
||||
int fd;
|
||||
struct spdk_poller *poller;
|
||||
struct nbd_io io;
|
||||
uint32_t buf_align;
|
||||
};
|
||||
|
||||
struct nbd_disk g_nbd_disk = {};
|
||||
|
||||
static bool
|
||||
is_read(enum spdk_bdev_io_type io_type)
|
||||
{
|
||||
if (io_type == SPDK_BDEV_IO_TYPE_READ) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_write(enum spdk_bdev_io_type io_type)
|
||||
{
|
||||
switch (io_type) {
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
case SPDK_BDEV_IO_TYPE_UNMAP:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
spdk_nbd_stop(void)
|
||||
{
|
||||
spdk_put_io_channel(g_nbd_disk.ch);
|
||||
spdk_bdev_close(g_nbd_disk.bdev_desc);
|
||||
close(g_nbd_disk.fd);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_from_socket(int fd, void *buf, size_t length)
|
||||
{
|
||||
ssize_t bytes_read;
|
||||
|
||||
bytes_read = read(fd, buf, length);
|
||||
if (bytes_read == 0) {
|
||||
spdk_app_stop(-1);
|
||||
return 0;
|
||||
} else if (bytes_read == -1) {
|
||||
if (errno != EAGAIN) {
|
||||
spdk_app_stop(-1);
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
return bytes_read;
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
write_to_socket(int fd, void *buf, size_t length)
|
||||
{
|
||||
ssize_t bytes_written;
|
||||
|
||||
bytes_written = write(fd, buf, length);
|
||||
if (bytes_written == 0) {
|
||||
spdk_app_stop(-1);
|
||||
return 0;
|
||||
} else if (bytes_written == -1) {
|
||||
if (errno != EAGAIN) {
|
||||
spdk_app_stop(-1);
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
return bytes_written;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
||||
{
|
||||
struct nbd_io *io = cb_arg;
|
||||
|
||||
if (success) {
|
||||
io->resp.error = 0;
|
||||
} else {
|
||||
to_be32(&io->resp.error, EIO);
|
||||
}
|
||||
io->resp_in_progress = true;
|
||||
if (bdev_io != NULL) {
|
||||
spdk_bdev_free_io(bdev_io);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_submit_bdev_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
|
||||
struct spdk_io_channel *ch, struct nbd_io *io)
|
||||
{
|
||||
int rc;
|
||||
|
||||
switch (io->type) {
|
||||
case SPDK_BDEV_IO_TYPE_READ:
|
||||
rc = spdk_bdev_read(desc, ch, io->payload, from_be64(&io->req.from),
|
||||
io->payload_size, nbd_io_done, io);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
rc = spdk_bdev_write(desc, ch, io->payload, from_be64(&io->req.from),
|
||||
io->payload_size, nbd_io_done, io);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_UNMAP:
|
||||
to_be64(&io->unmap.lba, from_be64(&io->req.from) / spdk_bdev_get_block_size(bdev));
|
||||
to_be32(&io->unmap.block_count, io->payload_size / spdk_bdev_get_block_size(bdev));
|
||||
rc = spdk_bdev_unmap(desc, ch, &io->unmap, 1, nbd_io_done, io);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_FLUSH:
|
||||
rc = spdk_bdev_flush(desc, ch, 0, spdk_bdev_get_num_blocks(bdev) * spdk_bdev_get_block_size(bdev),
|
||||
nbd_io_done, io);
|
||||
break;
|
||||
default:
|
||||
rc = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (rc == -1) {
|
||||
nbd_io_done(NULL, false, io);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
process_request(struct nbd_disk *nbd)
|
||||
{
|
||||
struct nbd_io *io = &nbd->io;
|
||||
|
||||
memcpy(&io->resp.handle, &io->req.handle, sizeof(io->resp.handle));
|
||||
io->resp.error = 0;
|
||||
io->offset = 0;
|
||||
|
||||
io->payload_size = from_be32(&io->req.len);
|
||||
spdk_dma_free(io->payload);
|
||||
io->payload = spdk_dma_malloc(io->payload_size, nbd->buf_align, NULL);
|
||||
if (io->payload == NULL) {
|
||||
SPDK_ERRLOG("could not allocate io->payload of size %d\n", io->payload_size);
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(from_be32(&io->req.magic) == NBD_REQUEST_MAGIC);
|
||||
|
||||
switch (from_be32(&io->req.type)) {
|
||||
case NBD_CMD_READ:
|
||||
io->type = SPDK_BDEV_IO_TYPE_READ;
|
||||
nbd_submit_bdev_io(nbd->bdev, nbd->bdev_desc, nbd->ch, io);
|
||||
break;
|
||||
case NBD_CMD_WRITE:
|
||||
io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
||||
io->payload_in_progress = true;
|
||||
break;
|
||||
case NBD_CMD_DISC:
|
||||
spdk_nbd_stop();
|
||||
return;
|
||||
#ifdef NBD_FLAG_SEND_FLUSH
|
||||
case NBD_CMD_FLUSH:
|
||||
io->type = SPDK_BDEV_IO_TYPE_FLUSH;
|
||||
nbd_submit_bdev_io(nbd->bdev, nbd->bdev_desc, nbd->ch, io);
|
||||
break;
|
||||
#endif
|
||||
#ifdef NBD_FLAG_SEND_TRIM
|
||||
case NBD_CMD_TRIM:
|
||||
io->type = SPDK_BDEV_IO_TYPE_UNMAP;
|
||||
nbd_submit_bdev_io(nbd->bdev, nbd->bdev_desc, nbd->ch, io);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_poll(void *arg)
|
||||
{
|
||||
struct nbd_disk *nbd = arg;
|
||||
struct nbd_io *io = &nbd->io;
|
||||
int fd = nbd->fd;
|
||||
uint64_t ret;
|
||||
|
||||
if (io->req_in_progress) {
|
||||
ret = read_from_socket(fd, (char *)&io->req + io->offset, sizeof(io->req) - io->offset);
|
||||
if (ret == 0) {
|
||||
return;
|
||||
}
|
||||
io->offset += ret;
|
||||
if (io->offset == sizeof(io->req)) {
|
||||
io->req_in_progress = false;
|
||||
process_request(nbd);
|
||||
}
|
||||
}
|
||||
|
||||
if (io->payload_in_progress && is_write(io->type)) {
|
||||
ret = read_from_socket(fd, io->payload + io->offset, io->payload_size - io->offset);
|
||||
if (ret == 0) {
|
||||
return;
|
||||
}
|
||||
io->offset += ret;
|
||||
if (io->offset == io->payload_size) {
|
||||
io->payload_in_progress = false;
|
||||
nbd_submit_bdev_io(nbd->bdev, nbd->bdev_desc, nbd->ch, io);
|
||||
io->offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (io->resp_in_progress) {
|
||||
ret = write_to_socket(fd, (char *)&io->resp + io->offset, sizeof(io->resp) - io->offset);
|
||||
if (ret == 0) {
|
||||
return;
|
||||
}
|
||||
io->offset += ret;
|
||||
if (io->offset == sizeof(io->resp)) {
|
||||
io->resp_in_progress = false;
|
||||
if (is_read(io->type)) {
|
||||
io->payload_in_progress = true;
|
||||
} else {
|
||||
io->req_in_progress = true;
|
||||
}
|
||||
io->offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (io->payload_in_progress && is_read(io->type)) {
|
||||
ret = write_to_socket(fd, io->payload + io->offset, io->payload_size - io->offset);
|
||||
if (ret == 0) {
|
||||
return;
|
||||
}
|
||||
io->offset += ret;
|
||||
if (io->offset == io->payload_size) {
|
||||
io->payload_in_progress = false;
|
||||
io->req_in_progress = true;
|
||||
io->offset = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_start_kernel(int nbd_fd, int *sp)
|
||||
{
|
||||
int rc;
|
||||
|
||||
close(sp[0]);
|
||||
|
||||
rc = ioctl(nbd_fd, NBD_SET_SOCK, sp[1]);
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_SET_SOCK) failed: %s\n", strerror(errno));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
#ifdef NBD_FLAG_SEND_TRIM
|
||||
rc = ioctl(nbd_fd, NBD_SET_FLAGS, NBD_FLAG_SEND_TRIM);
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_SET_FLAGS) failed: %s\n", strerror(errno));
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* This will block in the kernel until the client disconnects. */
|
||||
ioctl(nbd_fd, NBD_DO_IT);
|
||||
|
||||
ioctl(nbd_fd, NBD_CLEAR_QUE);
|
||||
ioctl(nbd_fd, NBD_CLEAR_SOCK);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int
|
||||
spdk_nbd_start(struct spdk_bdev *bdev, const char *nbd_path)
|
||||
{
|
||||
int rc;
|
||||
int sp[2], nbd_fd;
|
||||
|
||||
rc = spdk_bdev_open(bdev, true, NULL, NULL, &g_nbd_disk.bdev_desc);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("could not open bdev %s, error=%d\n", spdk_bdev_get_name(bdev), rc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
g_nbd_disk.bdev = bdev;
|
||||
g_nbd_disk.ch = spdk_bdev_get_io_channel(g_nbd_disk.bdev_desc);
|
||||
g_nbd_disk.buf_align = spdk_max(spdk_bdev_get_buf_align(bdev), 64);
|
||||
|
||||
rc = socketpair(AF_UNIX, SOCK_STREAM, 0, sp);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("socketpair failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
nbd_fd = open(nbd_path, O_RDWR);
|
||||
if (nbd_fd == -1) {
|
||||
SPDK_ERRLOG("open(\"%s\") failed: %s\n", nbd_path, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
rc = ioctl(nbd_fd, NBD_SET_BLKSIZE, spdk_bdev_get_block_size(bdev));
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_SET_BLKSIZE) failed: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
rc = ioctl(nbd_fd, NBD_SET_SIZE_BLOCKS, spdk_bdev_get_num_blocks(bdev));
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_SET_SIZE_BLOCKS) failed: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
rc = ioctl(nbd_fd, NBD_CLEAR_SOCK);
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_CLEAR_SOCK) failed: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Enabling kernel access to bdev %s via %s\n", spdk_bdev_get_name(bdev), nbd_path);
|
||||
|
||||
rc = fork();
|
||||
|
||||
switch (rc) {
|
||||
case 0:
|
||||
nbd_start_kernel(nbd_fd, sp);
|
||||
break;
|
||||
case -1:
|
||||
SPDK_ERRLOG("could not fork: %s\n", strerror(errno));
|
||||
return -1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
close(sp[1]);
|
||||
|
||||
g_nbd_disk.fd = sp[0];
|
||||
fcntl(g_nbd_disk.fd, F_SETFL, O_NONBLOCK);
|
||||
|
||||
to_be32(&g_nbd_disk.io.resp.magic, NBD_REPLY_MAGIC);
|
||||
g_nbd_disk.io.req_in_progress = true;
|
||||
|
||||
spdk_poller_register(&g_nbd_disk.poller, nbd_poll, &g_nbd_disk, spdk_env_get_current_core(), 0);
|
||||
return 0;
|
||||
}
|
@ -42,7 +42,7 @@ C_SRCS := nbd.c
|
||||
|
||||
CFLAGS += -I. $(ENV_CFLAGS)
|
||||
|
||||
SPDK_LIB_LIST = event_bdev event_copy event_rpc
|
||||
SPDK_LIB_LIST = event_bdev event_copy event_rpc nbd
|
||||
SPDK_LIB_LIST += bdev bdev_rpc copy event trace log log_rpc conf util rpc jsonrpc json
|
||||
|
||||
LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \
|
||||
|
@ -35,328 +35,30 @@
|
||||
|
||||
#include <linux/nbd.h>
|
||||
|
||||
#include "spdk/nbd.h"
|
||||
#include "spdk/bdev.h"
|
||||
#include "spdk/endian.h"
|
||||
#include "spdk/env.h"
|
||||
#include "spdk/event.h"
|
||||
#include "spdk/log.h"
|
||||
#include "spdk/util.h"
|
||||
#include "spdk/io_channel.h"
|
||||
|
||||
static char *g_bdev_name;
|
||||
static char *g_nbd_name = "/dev/nbd0";
|
||||
|
||||
/*
|
||||
* Used to determine how the I/O buffers should be aligned.
|
||||
* This alignment will be bumped up for blockdevs that
|
||||
* require alignment based on block length - for example,
|
||||
* AIO blockdevs.
|
||||
*/
|
||||
static size_t g_min_alignment = 64;
|
||||
|
||||
#include "../common.c"
|
||||
|
||||
struct nbd_io {
|
||||
enum spdk_bdev_io_type type;
|
||||
void *payload;
|
||||
|
||||
/* NOTE: for TRIM, this represents number of bytes to trim. */
|
||||
uint32_t payload_size;
|
||||
|
||||
bool payload_in_progress;
|
||||
|
||||
struct nbd_request req;
|
||||
bool req_in_progress;
|
||||
|
||||
struct nbd_reply resp;
|
||||
bool resp_in_progress;
|
||||
|
||||
struct spdk_scsi_unmap_bdesc unmap;
|
||||
|
||||
/*
|
||||
* Tracks current progress on reading/writing a request,
|
||||
* response, or payload from the nbd socket.
|
||||
*/
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
struct nbd_disk {
|
||||
struct spdk_bdev *bdev;
|
||||
struct spdk_bdev_desc *bdev_desc;
|
||||
struct spdk_io_channel *ch;
|
||||
int fd;
|
||||
struct spdk_poller *poller;
|
||||
struct nbd_io io;
|
||||
};
|
||||
|
||||
struct nbd_disk g_nbd_disk = {};
|
||||
|
||||
static bool
|
||||
is_read(enum spdk_bdev_io_type io_type)
|
||||
{
|
||||
if (io_type == SPDK_BDEV_IO_TYPE_READ) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_write(enum spdk_bdev_io_type io_type)
|
||||
{
|
||||
switch (io_type) {
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
case SPDK_BDEV_IO_TYPE_UNMAP:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_shutdown(void)
|
||||
{
|
||||
spdk_put_io_channel(g_nbd_disk.ch);
|
||||
spdk_bdev_close(g_nbd_disk.bdev_desc);
|
||||
close(g_nbd_disk.fd);
|
||||
spdk_nbd_stop();
|
||||
spdk_app_stop(0);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_from_socket(int fd, void *buf, size_t length)
|
||||
{
|
||||
ssize_t bytes_read;
|
||||
|
||||
bytes_read = read(fd, buf, length);
|
||||
if (bytes_read == 0) {
|
||||
spdk_app_stop(-1);
|
||||
return 0;
|
||||
} else if (bytes_read == -1) {
|
||||
if (errno != EAGAIN) {
|
||||
spdk_app_stop(-1);
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
return bytes_read;
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
write_to_socket(int fd, void *buf, size_t length)
|
||||
{
|
||||
ssize_t bytes_written;
|
||||
|
||||
bytes_written = write(fd, buf, length);
|
||||
if (bytes_written == 0) {
|
||||
spdk_app_stop(-1);
|
||||
return 0;
|
||||
} else if (bytes_written == -1) {
|
||||
if (errno != EAGAIN) {
|
||||
spdk_app_stop(-1);
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
return bytes_written;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
||||
{
|
||||
struct nbd_io *io = cb_arg;
|
||||
|
||||
if (success) {
|
||||
io->resp.error = 0;
|
||||
} else {
|
||||
to_be32(&io->resp.error, EIO);
|
||||
}
|
||||
io->resp_in_progress = true;
|
||||
if (bdev_io != NULL) {
|
||||
spdk_bdev_free_io(bdev_io);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_submit_bdev_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
|
||||
struct spdk_io_channel *ch, struct nbd_io *io)
|
||||
{
|
||||
int rc;
|
||||
|
||||
switch (io->type) {
|
||||
case SPDK_BDEV_IO_TYPE_READ:
|
||||
rc = spdk_bdev_read(desc, ch, io->payload, from_be64(&io->req.from),
|
||||
io->payload_size, nbd_io_done, io);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
rc = spdk_bdev_write(desc, ch, io->payload, from_be64(&io->req.from),
|
||||
io->payload_size, nbd_io_done, io);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_UNMAP:
|
||||
to_be64(&io->unmap.lba, from_be64(&io->req.from) / spdk_bdev_get_block_size(bdev));
|
||||
to_be32(&io->unmap.block_count, io->payload_size / spdk_bdev_get_block_size(bdev));
|
||||
rc = spdk_bdev_unmap(desc, ch, &io->unmap, 1, nbd_io_done, io);
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_FLUSH:
|
||||
rc = spdk_bdev_flush(desc, ch, 0, spdk_bdev_get_num_blocks(bdev) * spdk_bdev_get_block_size(bdev),
|
||||
nbd_io_done, io);
|
||||
break;
|
||||
default:
|
||||
rc = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (rc == -1) {
|
||||
nbd_io_done(NULL, false, io);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
process_request(struct nbd_disk *nbd)
|
||||
{
|
||||
struct nbd_io *io = &nbd->io;
|
||||
|
||||
memcpy(&io->resp.handle, &io->req.handle, sizeof(io->resp.handle));
|
||||
io->resp.error = 0;
|
||||
io->offset = 0;
|
||||
|
||||
io->payload_size = from_be32(&io->req.len);
|
||||
spdk_dma_free(io->payload);
|
||||
io->payload = spdk_dma_malloc(io->payload_size, g_min_alignment, NULL);
|
||||
if (io->payload == NULL) {
|
||||
SPDK_ERRLOG("could not allocate io->payload of size %d\n", io->payload_size);
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(from_be32(&io->req.magic) == NBD_REQUEST_MAGIC);
|
||||
|
||||
switch (from_be32(&io->req.type)) {
|
||||
case NBD_CMD_READ:
|
||||
io->type = SPDK_BDEV_IO_TYPE_READ;
|
||||
nbd_submit_bdev_io(nbd->bdev, nbd->bdev_desc, nbd->ch, io);
|
||||
break;
|
||||
case NBD_CMD_WRITE:
|
||||
io->type = SPDK_BDEV_IO_TYPE_WRITE;
|
||||
io->payload_in_progress = true;
|
||||
break;
|
||||
case NBD_CMD_DISC:
|
||||
nbd_shutdown();
|
||||
return;
|
||||
#ifdef NBD_FLAG_SEND_FLUSH
|
||||
case NBD_CMD_FLUSH:
|
||||
io->type = SPDK_BDEV_IO_TYPE_FLUSH;
|
||||
nbd_submit_bdev_io(nbd->bdev, nbd->bdev_desc, nbd->ch, io);
|
||||
break;
|
||||
#endif
|
||||
#ifdef NBD_FLAG_SEND_TRIM
|
||||
case NBD_CMD_TRIM:
|
||||
io->type = SPDK_BDEV_IO_TYPE_UNMAP;
|
||||
nbd_submit_bdev_io(nbd->bdev, nbd->bdev_desc, nbd->ch, io);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_poll(void *arg)
|
||||
{
|
||||
struct nbd_disk *nbd = arg;
|
||||
struct nbd_io *io = &nbd->io;
|
||||
int fd = nbd->fd;
|
||||
uint64_t ret;
|
||||
|
||||
if (io->req_in_progress) {
|
||||
ret = read_from_socket(fd, (char *)&io->req + io->offset, sizeof(io->req) - io->offset);
|
||||
if (ret == 0) {
|
||||
return;
|
||||
}
|
||||
io->offset += ret;
|
||||
if (io->offset == sizeof(io->req)) {
|
||||
io->req_in_progress = false;
|
||||
process_request(nbd);
|
||||
}
|
||||
}
|
||||
|
||||
if (io->payload_in_progress && is_write(io->type)) {
|
||||
ret = read_from_socket(fd, io->payload + io->offset, io->payload_size - io->offset);
|
||||
if (ret == 0) {
|
||||
return;
|
||||
}
|
||||
io->offset += ret;
|
||||
if (io->offset == io->payload_size) {
|
||||
io->payload_in_progress = false;
|
||||
nbd_submit_bdev_io(nbd->bdev, nbd->bdev_desc, nbd->ch, io);
|
||||
io->offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (io->resp_in_progress) {
|
||||
ret = write_to_socket(fd, (char *)&io->resp + io->offset, sizeof(io->resp) - io->offset);
|
||||
if (ret == 0) {
|
||||
return;
|
||||
}
|
||||
io->offset += ret;
|
||||
if (io->offset == sizeof(io->resp)) {
|
||||
io->resp_in_progress = false;
|
||||
if (is_read(io->type)) {
|
||||
io->payload_in_progress = true;
|
||||
} else {
|
||||
io->req_in_progress = true;
|
||||
}
|
||||
io->offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (io->payload_in_progress && is_read(io->type)) {
|
||||
ret = write_to_socket(fd, io->payload + io->offset, io->payload_size - io->offset);
|
||||
if (ret == 0) {
|
||||
return;
|
||||
}
|
||||
io->offset += ret;
|
||||
if (io->offset == io->payload_size) {
|
||||
io->payload_in_progress = false;
|
||||
io->req_in_progress = true;
|
||||
io->offset = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_start_kernel(int nbd_fd, int *sp)
|
||||
{
|
||||
int rc;
|
||||
|
||||
close(sp[0]);
|
||||
|
||||
rc = ioctl(nbd_fd, NBD_SET_SOCK, sp[1]);
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_SET_SOCK) failed: %s\n", strerror(errno));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
#ifdef NBD_FLAG_SEND_TRIM
|
||||
rc = ioctl(nbd_fd, NBD_SET_FLAGS, NBD_FLAG_SEND_TRIM);
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_SET_FLAGS) failed: %s\n", strerror(errno));
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* This will block in the kernel until the client disconnects. */
|
||||
ioctl(nbd_fd, NBD_DO_IT);
|
||||
|
||||
ioctl(nbd_fd, NBD_CLEAR_QUE);
|
||||
ioctl(nbd_fd, NBD_CLEAR_SOCK);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void
|
||||
nbd_start(void *arg1, void *arg2)
|
||||
{
|
||||
struct spdk_bdev *bdev;
|
||||
int rc;
|
||||
int sp[2], nbd_fd;
|
||||
int rc;
|
||||
|
||||
bdev = spdk_bdev_get_by_name(g_bdev_name);
|
||||
if (bdev == NULL) {
|
||||
@ -365,78 +67,11 @@ nbd_start(void *arg1, void *arg2)
|
||||
return;
|
||||
}
|
||||
|
||||
rc = spdk_bdev_open(bdev, true, NULL, NULL, &g_nbd_disk.bdev_desc);
|
||||
rc = spdk_nbd_start(bdev, g_nbd_name);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("could not open bdev %s, error=%d\n", g_bdev_name, rc);
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
g_nbd_disk.bdev = bdev;
|
||||
g_min_alignment = spdk_max(g_min_alignment, spdk_bdev_get_buf_align(bdev));
|
||||
g_nbd_disk.ch = spdk_bdev_get_io_channel(g_nbd_disk.bdev_desc);
|
||||
|
||||
rc = socketpair(AF_UNIX, SOCK_STREAM, 0, sp);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("socketpair failed\n");
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
nbd_fd = open(g_nbd_name, O_RDWR);
|
||||
if (nbd_fd == -1) {
|
||||
SPDK_ERRLOG("open(\"%s\") failed: %s\n", g_nbd_name, strerror(errno));
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
rc = ioctl(nbd_fd, NBD_SET_BLKSIZE, spdk_bdev_get_block_size(bdev));
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_SET_BLKSIZE) failed: %s\n", strerror(errno));
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
rc = ioctl(nbd_fd, NBD_SET_SIZE_BLOCKS, spdk_bdev_get_num_blocks(bdev));
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_SET_SIZE_BLOCKS) failed: %s\n", strerror(errno));
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
rc = ioctl(nbd_fd, NBD_CLEAR_SOCK);
|
||||
if (rc == -1) {
|
||||
SPDK_ERRLOG("ioctl(NBD_CLEAR_SOCK) failed: %s\n", strerror(errno));
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
}
|
||||
|
||||
printf("Enabling kernel access to bdev %s via %s\n", g_bdev_name, g_nbd_name);
|
||||
|
||||
rc = fork();
|
||||
|
||||
switch (rc) {
|
||||
case 0:
|
||||
nbd_start_kernel(nbd_fd, sp);
|
||||
break;
|
||||
case -1:
|
||||
SPDK_ERRLOG("could not fork: %s\n", strerror(errno));
|
||||
spdk_app_stop(-1);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
close(sp[1]);
|
||||
|
||||
g_nbd_disk.fd = sp[0];
|
||||
fcntl(g_nbd_disk.fd, F_SETFL, O_NONBLOCK);
|
||||
|
||||
to_be32(&g_nbd_disk.io.resp.magic, NBD_REPLY_MAGIC);
|
||||
g_nbd_disk.io.req_in_progress = true;
|
||||
|
||||
spdk_poller_register(&g_nbd_disk.poller, nbd_poll, &g_nbd_disk, spdk_env_get_current_core(), 0);
|
||||
|
||||
}
|
||||
|
||||
static void usage(char *program_name)
|
||||
|
Loading…
Reference in New Issue
Block a user