When a bdev is being unregistered, after all channels have been closed, the bdev layer calls the module's destruct callback for the bdev before calling the bdev unregister callback. For the rbd module, the destruct callback is bdev_rbd_destruct. This callback unregisters the rbd io_device which is an asynchronous operation. We need to return >0 from bdev_rbd_destruct to inform the bdev layer that this is an asynchronous operation, so that it does not immediately call the bdev unregister callback. Once the rbd io_device is unregistered, we can call spdk_bdev_destruct_done() which will trigger the bdev layer to finally call the bdev unregister callback. Without this fix, deleting an rbd bdev would complete before the backing cluster reference had been released. This meant that even if you had deleted all rbd bdevs, there might still be cluster references in place for a short period of time. It's better to wait to complete the delete operation until the cluster reference has been released to avoid this issue (which this patch now does). Fixes issue #2069. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I8ac156c89d3e235a95ef196308cc349e6078bfd7 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9115 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Ziye Yang <ziye.yang@intel.com>
1406 lines
32 KiB
C
1406 lines
32 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright (c) Intel Corporation.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "spdk/stdinc.h"
|
|
|
|
#include "bdev_rbd.h"
|
|
|
|
#include <rbd/librbd.h>
|
|
#include <rados/librados.h>
|
|
#include <sys/eventfd.h>
|
|
#include <sys/epoll.h>
|
|
|
|
#include "spdk/env.h"
|
|
#include "spdk/bdev.h"
|
|
#include "spdk/thread.h"
|
|
#include "spdk/json.h"
|
|
#include "spdk/string.h"
|
|
#include "spdk/util.h"
|
|
#include "spdk/likely.h"
|
|
|
|
#include "spdk/bdev_module.h"
|
|
#include "spdk/log.h"
|
|
|
|
#define SPDK_RBD_QUEUE_DEPTH 128
|
|
#define MAX_EVENTS_PER_POLL 128
|
|
|
|
static int bdev_rbd_count = 0;
|
|
|
|
struct bdev_rbd {
|
|
struct spdk_bdev disk;
|
|
char *rbd_name;
|
|
char *user_id;
|
|
char *pool_name;
|
|
char **config;
|
|
|
|
rados_t cluster;
|
|
rados_t *cluster_p;
|
|
char *cluster_name;
|
|
|
|
rados_ioctx_t io_ctx;
|
|
rbd_image_t image;
|
|
int pfd;
|
|
|
|
rbd_image_info_t info;
|
|
pthread_mutex_t mutex;
|
|
struct spdk_thread *main_td;
|
|
struct spdk_thread *destruct_td;
|
|
uint32_t ch_count;
|
|
struct bdev_rbd_group_channel *group_ch;
|
|
|
|
TAILQ_ENTRY(bdev_rbd) tailq;
|
|
struct spdk_poller *reset_timer;
|
|
struct spdk_bdev_io *reset_bdev_io;
|
|
};
|
|
|
|
struct bdev_rbd_group_channel {
|
|
struct spdk_poller *poller;
|
|
int epoll_fd;
|
|
};
|
|
|
|
struct bdev_rbd_io_channel {
|
|
struct bdev_rbd *disk;
|
|
};
|
|
|
|
struct bdev_rbd_io {
|
|
struct spdk_thread *submit_td;
|
|
enum spdk_bdev_io_status status;
|
|
size_t total_len;
|
|
};
|
|
|
|
struct bdev_rbd_cluster {
|
|
char *name;
|
|
char *user_id;
|
|
char **config_param;
|
|
char *config_file;
|
|
rados_t cluster;
|
|
uint32_t ref;
|
|
STAILQ_ENTRY(bdev_rbd_cluster) link;
|
|
};
|
|
|
|
static STAILQ_HEAD(, bdev_rbd_cluster) g_map_bdev_rbd_cluster = STAILQ_HEAD_INITIALIZER(
|
|
g_map_bdev_rbd_cluster);
|
|
static pthread_mutex_t g_map_bdev_rbd_cluster_mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
static void
|
|
bdev_rbd_cluster_free(struct bdev_rbd_cluster *entry)
|
|
{
|
|
assert(entry != NULL);
|
|
|
|
bdev_rbd_free_config(entry->config_param);
|
|
free(entry->config_file);
|
|
free(entry->user_id);
|
|
free(entry->name);
|
|
free(entry);
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_put_cluster(rados_t **cluster)
|
|
{
|
|
struct bdev_rbd_cluster *entry;
|
|
|
|
assert(cluster != NULL);
|
|
|
|
/* No need go through the map if *cluster equals to NULL */
|
|
if (*cluster == NULL) {
|
|
return;
|
|
}
|
|
|
|
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
|
|
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
|
|
if (*cluster != &entry->cluster) {
|
|
continue;
|
|
}
|
|
|
|
assert(entry->ref > 0);
|
|
entry->ref--;
|
|
*cluster = NULL;
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return;
|
|
}
|
|
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
SPDK_ERRLOG("Cannot find the entry for cluster=%p\n", cluster);
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_free(struct bdev_rbd *rbd)
|
|
{
|
|
if (!rbd) {
|
|
return;
|
|
}
|
|
|
|
free(rbd->disk.name);
|
|
free(rbd->rbd_name);
|
|
free(rbd->user_id);
|
|
free(rbd->pool_name);
|
|
bdev_rbd_free_config(rbd->config);
|
|
|
|
if (rbd->io_ctx) {
|
|
rados_ioctx_destroy(rbd->io_ctx);
|
|
}
|
|
|
|
if (rbd->cluster_name) {
|
|
bdev_rbd_put_cluster(&rbd->cluster_p);
|
|
free(rbd->cluster_name);
|
|
} else if (rbd->cluster) {
|
|
rados_shutdown(rbd->cluster);
|
|
}
|
|
|
|
pthread_mutex_destroy(&rbd->mutex);
|
|
free(rbd);
|
|
}
|
|
|
|
void
|
|
bdev_rbd_free_config(char **config)
|
|
{
|
|
char **entry;
|
|
|
|
if (config) {
|
|
for (entry = config; *entry; entry++) {
|
|
free(*entry);
|
|
}
|
|
free(config);
|
|
}
|
|
}
|
|
|
|
char **
|
|
bdev_rbd_dup_config(const char *const *config)
|
|
{
|
|
size_t count;
|
|
char **copy;
|
|
|
|
if (!config) {
|
|
return NULL;
|
|
}
|
|
for (count = 0; config[count]; count++) {}
|
|
copy = calloc(count + 1, sizeof(*copy));
|
|
if (!copy) {
|
|
return NULL;
|
|
}
|
|
for (count = 0; config[count]; count++) {
|
|
if (!(copy[count] = strdup(config[count]))) {
|
|
bdev_rbd_free_config(copy);
|
|
return NULL;
|
|
}
|
|
}
|
|
return copy;
|
|
}
|
|
|
|
static int
|
|
bdev_rados_cluster_init(const char *user_id, const char *const *config,
|
|
rados_t *cluster)
|
|
{
|
|
int ret;
|
|
|
|
ret = rados_create(cluster, user_id);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("Failed to create rados_t struct\n");
|
|
return -1;
|
|
}
|
|
|
|
if (config) {
|
|
const char *const *entry = config;
|
|
while (*entry) {
|
|
ret = rados_conf_set(*cluster, entry[0], entry[1]);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("Failed to set %s = %s\n", entry[0], entry[1]);
|
|
rados_shutdown(*cluster);
|
|
return -1;
|
|
}
|
|
entry += 2;
|
|
}
|
|
} else {
|
|
ret = rados_conf_read_file(*cluster, NULL);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("Failed to read conf file\n");
|
|
rados_shutdown(*cluster);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
ret = rados_connect(*cluster);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("Failed to connect to rbd_pool\n");
|
|
rados_shutdown(*cluster);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_get_cluster(const char *cluster_name, rados_t **cluster)
|
|
{
|
|
struct bdev_rbd_cluster *entry;
|
|
|
|
if (cluster == NULL) {
|
|
SPDK_ERRLOG("cluster should not be NULL\n");
|
|
return -1;
|
|
}
|
|
|
|
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
|
|
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
|
|
if (strcmp(cluster_name, entry->name) == 0) {
|
|
entry->ref++;
|
|
*cluster = &entry->cluster;
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_shared_cluster_init(const char *cluster_name, rados_t **cluster)
|
|
{
|
|
int ret;
|
|
|
|
ret = bdev_rbd_get_cluster(cluster_name, cluster);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("Failed to create rados_t struct\n");
|
|
return -1;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void *
|
|
bdev_rbd_cluster_handle(void *arg)
|
|
{
|
|
void *ret = arg;
|
|
struct bdev_rbd *rbd = arg;
|
|
int rc;
|
|
|
|
rc = bdev_rados_cluster_init(rbd->user_id, (const char *const *)rbd->config,
|
|
&rbd->cluster);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to create rados cluster for user_id=%s and rbd_pool=%s\n",
|
|
rbd->user_id ? rbd->user_id : "admin (the default)", rbd->pool_name);
|
|
ret = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void *
|
|
bdev_rbd_init_context(void *arg)
|
|
{
|
|
struct bdev_rbd *rbd = arg;
|
|
int rc;
|
|
|
|
if (rados_ioctx_create(*(rbd->cluster_p), rbd->pool_name, &rbd->io_ctx) < 0) {
|
|
SPDK_ERRLOG("Failed to create ioctx on rbd=%p\n", rbd);
|
|
return NULL;
|
|
}
|
|
|
|
rc = rbd_open(rbd->io_ctx, rbd->rbd_name, &rbd->image, NULL);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to open specified rbd device\n");
|
|
return NULL;
|
|
}
|
|
|
|
rc = rbd_stat(rbd->image, &rbd->info, sizeof(rbd->info));
|
|
rbd_close(rbd->image);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to stat specified rbd device\n");
|
|
return NULL;
|
|
}
|
|
|
|
return arg;
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_init(struct bdev_rbd *rbd)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (!rbd->cluster_name) {
|
|
rbd->cluster_p = &rbd->cluster;
|
|
/* Cluster should be created in non-SPDK thread to avoid conflict between
|
|
* Rados and SPDK thread */
|
|
if (spdk_call_unaffinitized(bdev_rbd_cluster_handle, rbd) == NULL) {
|
|
SPDK_ERRLOG("Cannot create the rados object on rbd=%p\n", rbd);
|
|
return -1;
|
|
}
|
|
} else {
|
|
ret = bdev_rbd_shared_cluster_init(rbd->cluster_name, &rbd->cluster_p);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("Failed to create rados object for rbd =%p on cluster_name=%s\n",
|
|
rbd, rbd->cluster_name);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (spdk_call_unaffinitized(bdev_rbd_init_context, rbd) == NULL) {
|
|
SPDK_ERRLOG("Cannot init rbd context for rbd=%p\n", rbd);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_exit(rbd_image_t image)
|
|
{
|
|
rbd_flush(image);
|
|
rbd_close(image);
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_finish_aiocb(rbd_completion_t cb, void *arg)
|
|
{
|
|
/* Doing nothing here */
|
|
}
|
|
|
|
static void
|
|
_bdev_rbd_io_complete(void *_rbd_io)
|
|
{
|
|
struct bdev_rbd_io *rbd_io = _rbd_io;
|
|
|
|
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(rbd_io), rbd_io->status);
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
|
|
{
|
|
struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
|
|
|
|
rbd_io->status = status;
|
|
if (rbd_io->submit_td != NULL) {
|
|
spdk_thread_send_msg(rbd_io->submit_td, _bdev_rbd_io_complete, rbd_io);
|
|
} else {
|
|
_bdev_rbd_io_complete(rbd_io);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_start_aio(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io,
|
|
struct iovec *iov, int iovcnt, uint64_t offset, size_t len)
|
|
{
|
|
int ret;
|
|
rbd_completion_t comp;
|
|
struct bdev_rbd_io *rbd_io;
|
|
rbd_image_t image = disk->image;
|
|
|
|
ret = rbd_aio_create_completion(bdev_io, bdev_rbd_finish_aiocb,
|
|
&comp);
|
|
if (ret < 0) {
|
|
goto err;
|
|
}
|
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
|
|
rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
|
|
rbd_io->total_len = len;
|
|
if (spdk_likely(iovcnt == 1)) {
|
|
ret = rbd_aio_read(image, offset, iov[0].iov_len, iov[0].iov_base, comp);
|
|
} else {
|
|
ret = rbd_aio_readv(image, iov, iovcnt, offset, comp);
|
|
}
|
|
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
|
|
if (spdk_likely(iovcnt == 1)) {
|
|
ret = rbd_aio_write(image, offset, iov[0].iov_len, iov[0].iov_base, comp);
|
|
} else {
|
|
ret = rbd_aio_writev(image, iov, iovcnt, offset, comp);
|
|
}
|
|
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
|
|
ret = rbd_aio_flush(image, comp);
|
|
}
|
|
|
|
if (ret < 0) {
|
|
rbd_aio_release(comp);
|
|
goto err;
|
|
}
|
|
|
|
return;
|
|
|
|
err:
|
|
bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
}
|
|
|
|
static int bdev_rbd_library_init(void);
|
|
|
|
static void bdev_rbd_library_fini(void);
|
|
|
|
static int
|
|
bdev_rbd_get_ctx_size(void)
|
|
{
|
|
return sizeof(struct bdev_rbd_io);
|
|
}
|
|
|
|
static struct spdk_bdev_module rbd_if = {
|
|
.name = "rbd",
|
|
.module_init = bdev_rbd_library_init,
|
|
.module_fini = bdev_rbd_library_fini,
|
|
.get_ctx_size = bdev_rbd_get_ctx_size,
|
|
|
|
};
|
|
SPDK_BDEV_MODULE_REGISTER(rbd, &rbd_if)
|
|
|
|
static int
|
|
bdev_rbd_reset_timer(void *arg)
|
|
{
|
|
struct bdev_rbd *disk = arg;
|
|
|
|
/*
|
|
* TODO: This should check if any I/O is still in flight before completing the reset.
|
|
* For now, just complete after the timer expires.
|
|
*/
|
|
bdev_rbd_io_complete(disk->reset_bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
|
|
spdk_poller_unregister(&disk->reset_timer);
|
|
disk->reset_bdev_io = NULL;
|
|
|
|
return SPDK_POLLER_BUSY;
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_reset(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io)
|
|
{
|
|
/*
|
|
* HACK: Since librbd doesn't provide any way to cancel outstanding aio, just kick off a
|
|
* timer to wait for in-flight I/O to complete.
|
|
*/
|
|
assert(disk->reset_bdev_io == NULL);
|
|
disk->reset_bdev_io = bdev_io;
|
|
disk->reset_timer = SPDK_POLLER_REGISTER(bdev_rbd_reset_timer, disk, 1 * 1000 * 1000);
|
|
}
|
|
|
|
static void
|
|
_bdev_rbd_destruct_done(void *io_device)
|
|
{
|
|
struct bdev_rbd *rbd = io_device;
|
|
|
|
assert(rbd != NULL);
|
|
assert(rbd->ch_count == 0);
|
|
|
|
spdk_bdev_destruct_done(&rbd->disk, 0);
|
|
bdev_rbd_free(rbd);
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_free_cb(void *io_device)
|
|
{
|
|
struct bdev_rbd *rbd = io_device;
|
|
|
|
/* The io device has been unregistered. Send a message back to the
|
|
* original thread that started the destruct operation, so that the
|
|
* bdev unregister callback is invoked on the same thread that started
|
|
* this whole process.
|
|
*/
|
|
spdk_thread_send_msg(rbd->destruct_td, _bdev_rbd_destruct_done, rbd);
|
|
}
|
|
|
|
static void
|
|
_bdev_rbd_destruct(void *ctx)
|
|
{
|
|
struct bdev_rbd *rbd = ctx;
|
|
|
|
spdk_io_device_unregister(rbd, bdev_rbd_free_cb);
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_destruct(void *ctx)
|
|
{
|
|
struct bdev_rbd *rbd = ctx;
|
|
struct spdk_thread *td;
|
|
|
|
if (rbd->main_td == NULL) {
|
|
td = spdk_get_thread();
|
|
} else {
|
|
td = rbd->main_td;
|
|
}
|
|
|
|
/* Start the destruct operation on the rbd bdev's
|
|
* main thread. This guarantees it will only start
|
|
* executing after any messages related to channel
|
|
* deletions have finished completing. *Always*
|
|
* send a message, even if this function gets called
|
|
* from the main thread, in case there are pending
|
|
* channel delete messages in flight to this thread.
|
|
*/
|
|
assert(rbd->destruct_td == NULL);
|
|
rbd->destruct_td = td;
|
|
spdk_thread_send_msg(td, _bdev_rbd_destruct, rbd);
|
|
|
|
/* Return 1 to indicate the destruct path is asynchronous. */
|
|
return 1;
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
|
|
bool success)
|
|
{
|
|
struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
|
|
|
|
if (!success) {
|
|
bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
return;
|
|
}
|
|
|
|
bdev_rbd_start_aio(disk,
|
|
bdev_io,
|
|
bdev_io->u.bdev.iovs,
|
|
bdev_io->u.bdev.iovcnt,
|
|
bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
|
|
}
|
|
|
|
static void
|
|
_bdev_rbd_submit_request(void *ctx)
|
|
{
|
|
struct spdk_bdev_io *bdev_io = ctx;
|
|
struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
|
|
|
|
switch (bdev_io->type) {
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
spdk_bdev_io_get_buf(bdev_io, bdev_rbd_get_buf_cb,
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
case SPDK_BDEV_IO_TYPE_FLUSH:
|
|
bdev_rbd_start_aio(disk,
|
|
bdev_io,
|
|
bdev_io->u.bdev.iovs,
|
|
bdev_io->u.bdev.iovcnt,
|
|
bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
|
|
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
|
|
break;
|
|
|
|
case SPDK_BDEV_IO_TYPE_RESET:
|
|
bdev_rbd_reset((struct bdev_rbd *)bdev_io->bdev->ctxt,
|
|
bdev_io);
|
|
break;
|
|
|
|
default:
|
|
SPDK_ERRLOG("Unsupported IO type =%d\n", bdev_io->type);
|
|
bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
|
|
{
|
|
struct spdk_thread *submit_td = spdk_io_channel_get_thread(ch);
|
|
struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
|
|
struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
|
|
|
|
if (disk->main_td != submit_td) {
|
|
rbd_io->submit_td = submit_td;
|
|
spdk_thread_send_msg(disk->main_td, _bdev_rbd_submit_request, bdev_io);
|
|
} else {
|
|
rbd_io->submit_td = NULL;
|
|
_bdev_rbd_submit_request(bdev_io);
|
|
}
|
|
}
|
|
|
|
static bool
|
|
bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
|
|
{
|
|
switch (io_type) {
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
case SPDK_BDEV_IO_TYPE_FLUSH:
|
|
case SPDK_BDEV_IO_TYPE_RESET:
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_io_poll(struct bdev_rbd *disk)
|
|
{
|
|
int i, io_status, rc;
|
|
rbd_completion_t comps[SPDK_RBD_QUEUE_DEPTH];
|
|
struct spdk_bdev_io *bdev_io;
|
|
struct bdev_rbd_io *rbd_io;
|
|
enum spdk_bdev_io_status bio_status;
|
|
|
|
rc = rbd_poll_io_events(disk->image, comps, SPDK_RBD_QUEUE_DEPTH);
|
|
for (i = 0; i < rc; i++) {
|
|
bdev_io = rbd_aio_get_arg(comps[i]);
|
|
rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
|
|
io_status = rbd_aio_get_return_value(comps[i]);
|
|
bio_status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
|
|
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
|
|
if ((int)rbd_io->total_len != io_status) {
|
|
bio_status = SPDK_BDEV_IO_STATUS_FAILED;
|
|
}
|
|
} else {
|
|
/* For others, 0 means success */
|
|
if (io_status != 0) {
|
|
bio_status = SPDK_BDEV_IO_STATUS_FAILED;
|
|
}
|
|
}
|
|
|
|
rbd_aio_release(comps[i]);
|
|
|
|
bdev_rbd_io_complete(bdev_io, bio_status);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_free_channel_resources(struct bdev_rbd *disk)
|
|
{
|
|
int rc;
|
|
|
|
assert(disk != NULL);
|
|
assert(disk->main_td == spdk_get_thread());
|
|
assert(disk->ch_count == 0);
|
|
assert(disk->group_ch != NULL);
|
|
rc = epoll_ctl(disk->group_ch->epoll_fd, EPOLL_CTL_DEL,
|
|
disk->pfd, NULL);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to remove fd on disk=%p from the polling group=%p\n",
|
|
disk, disk->group_ch);
|
|
}
|
|
spdk_put_io_channel(spdk_io_channel_from_ctx(disk->group_ch));
|
|
|
|
if (disk->image) {
|
|
bdev_rbd_exit(disk->image);
|
|
}
|
|
|
|
if (disk->pfd >= 0) {
|
|
close(disk->pfd);
|
|
}
|
|
|
|
disk->main_td = NULL;
|
|
disk->group_ch = NULL;
|
|
}
|
|
|
|
static void *
|
|
bdev_rbd_handle(void *arg)
|
|
{
|
|
struct bdev_rbd *disk = arg;
|
|
void *ret = arg;
|
|
|
|
if (rbd_open(disk->io_ctx, disk->rbd_name, &disk->image, NULL) < 0) {
|
|
SPDK_ERRLOG("Failed to open specified rbd device\n");
|
|
ret = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
_bdev_rbd_create_cb(struct bdev_rbd *disk)
|
|
{
|
|
int ret;
|
|
struct epoll_event event = {};
|
|
|
|
disk->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&rbd_if));
|
|
assert(disk->group_ch != NULL);
|
|
event.events = EPOLLIN;
|
|
event.data.ptr = disk;
|
|
|
|
if (spdk_call_unaffinitized(bdev_rbd_handle, disk) == NULL) {
|
|
goto err;
|
|
}
|
|
|
|
disk->pfd = eventfd(0, EFD_NONBLOCK);
|
|
if (disk->pfd < 0) {
|
|
SPDK_ERRLOG("Failed to get eventfd\n");
|
|
goto err;
|
|
}
|
|
|
|
ret = rbd_set_image_notification(disk->image, disk->pfd, EVENT_TYPE_EVENTFD);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("Failed to set rbd image notification\n");
|
|
goto err;
|
|
}
|
|
|
|
ret = epoll_ctl(disk->group_ch->epoll_fd, EPOLL_CTL_ADD, disk->pfd, &event);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("Failed to add the fd of disk=%p to the epoll group from group_ch=%p\n", disk,
|
|
disk->group_ch);
|
|
goto err;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
bdev_rbd_free_channel_resources(disk);
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_create_cb(void *io_device, void *ctx_buf)
|
|
{
|
|
struct bdev_rbd_io_channel *ch = ctx_buf;
|
|
struct bdev_rbd *disk = io_device;
|
|
int rc;
|
|
|
|
ch->disk = disk;
|
|
pthread_mutex_lock(&disk->mutex);
|
|
if (disk->ch_count == 0) {
|
|
assert(disk->main_td == NULL);
|
|
rc = _bdev_rbd_create_cb(disk);
|
|
if (rc) {
|
|
SPDK_ERRLOG("Cannot create channel for disk=%p\n", disk);
|
|
pthread_mutex_unlock(&disk->mutex);
|
|
return rc;
|
|
}
|
|
|
|
disk->main_td = spdk_get_thread();
|
|
}
|
|
|
|
disk->ch_count++;
|
|
pthread_mutex_unlock(&disk->mutex);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
_bdev_rbd_destroy_cb(void *ctx)
|
|
{
|
|
struct bdev_rbd *disk = ctx;
|
|
|
|
pthread_mutex_lock(&disk->mutex);
|
|
assert(disk->ch_count > 0);
|
|
disk->ch_count--;
|
|
|
|
if (disk->ch_count > 0) {
|
|
/* A new channel was created between when message was sent and this function executed */
|
|
pthread_mutex_unlock(&disk->mutex);
|
|
return;
|
|
}
|
|
|
|
bdev_rbd_free_channel_resources(disk);
|
|
pthread_mutex_unlock(&disk->mutex);
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
|
|
{
|
|
struct bdev_rbd *disk = io_device;
|
|
struct spdk_thread *thread;
|
|
|
|
pthread_mutex_lock(&disk->mutex);
|
|
assert(disk->ch_count > 0);
|
|
disk->ch_count--;
|
|
if (disk->ch_count == 0) {
|
|
assert(disk->main_td != NULL);
|
|
if (disk->main_td != spdk_get_thread()) {
|
|
/* The final channel was destroyed on a different thread
|
|
* than where the first channel was created. Pass a message
|
|
* to the main thread to unregister the poller. */
|
|
disk->ch_count++;
|
|
thread = disk->main_td;
|
|
pthread_mutex_unlock(&disk->mutex);
|
|
spdk_thread_send_msg(thread, _bdev_rbd_destroy_cb, disk);
|
|
return;
|
|
}
|
|
|
|
bdev_rbd_free_channel_resources(disk);
|
|
}
|
|
pthread_mutex_unlock(&disk->mutex);
|
|
}
|
|
|
|
static struct spdk_io_channel *
|
|
bdev_rbd_get_io_channel(void *ctx)
|
|
{
|
|
struct bdev_rbd *rbd_bdev = ctx;
|
|
|
|
return spdk_get_io_channel(rbd_bdev);
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_cluster_dump_entry(const char *cluster_name, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct bdev_rbd_cluster *entry;
|
|
|
|
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
|
|
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
|
|
if (strcmp(cluster_name, entry->name)) {
|
|
continue;
|
|
}
|
|
if (entry->user_id) {
|
|
spdk_json_write_named_string(w, "user_id", entry->user_id);
|
|
}
|
|
|
|
if (entry->config_param) {
|
|
char **config_entry = entry->config_param;
|
|
|
|
spdk_json_write_named_object_begin(w, "config_param");
|
|
while (*config_entry) {
|
|
spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
|
|
config_entry += 2;
|
|
}
|
|
spdk_json_write_object_end(w);
|
|
} else if (entry->config_file) {
|
|
spdk_json_write_named_string(w, "config_file", entry->config_file);
|
|
}
|
|
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return;
|
|
}
|
|
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct bdev_rbd *rbd_bdev = ctx;
|
|
|
|
spdk_json_write_named_object_begin(w, "rbd");
|
|
|
|
spdk_json_write_named_string(w, "pool_name", rbd_bdev->pool_name);
|
|
|
|
spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name);
|
|
|
|
if (rbd_bdev->cluster_name) {
|
|
bdev_rbd_cluster_dump_entry(rbd_bdev->cluster_name, w);
|
|
goto end;
|
|
}
|
|
|
|
if (rbd_bdev->user_id) {
|
|
spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id);
|
|
}
|
|
|
|
if (rbd_bdev->config) {
|
|
char **entry = rbd_bdev->config;
|
|
|
|
spdk_json_write_named_object_begin(w, "config");
|
|
while (*entry) {
|
|
spdk_json_write_named_string(w, entry[0], entry[1]);
|
|
entry += 2;
|
|
}
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
|
|
end:
|
|
spdk_json_write_object_end(w);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct bdev_rbd *rbd = bdev->ctxt;
|
|
|
|
spdk_json_write_object_begin(w);
|
|
|
|
spdk_json_write_named_string(w, "method", "bdev_rbd_create");
|
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
|
spdk_json_write_named_string(w, "name", bdev->name);
|
|
spdk_json_write_named_string(w, "pool_name", rbd->pool_name);
|
|
spdk_json_write_named_string(w, "rbd_name", rbd->rbd_name);
|
|
spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
|
|
if (rbd->user_id) {
|
|
spdk_json_write_named_string(w, "user_id", rbd->user_id);
|
|
}
|
|
|
|
if (rbd->config) {
|
|
char **entry = rbd->config;
|
|
|
|
spdk_json_write_named_object_begin(w, "config");
|
|
while (*entry) {
|
|
spdk_json_write_named_string(w, entry[0], entry[1]);
|
|
entry += 2;
|
|
}
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
|
|
static void
|
|
dump_single_cluster_entry(struct bdev_rbd_cluster *entry, struct spdk_json_write_ctx *w)
|
|
{
|
|
assert(entry != NULL);
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_string(w, "cluster_name", entry->name);
|
|
|
|
if (entry->user_id) {
|
|
spdk_json_write_named_string(w, "user_id", entry->user_id);
|
|
}
|
|
|
|
if (entry->config_param) {
|
|
char **config_entry = entry->config_param;
|
|
|
|
spdk_json_write_named_object_begin(w, "config_param");
|
|
while (*config_entry) {
|
|
spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
|
|
config_entry += 2;
|
|
}
|
|
spdk_json_write_object_end(w);
|
|
} else if (entry->config_file) {
|
|
spdk_json_write_named_string(w, "config_file", entry->config_file);
|
|
}
|
|
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
|
|
int
|
|
bdev_rbd_get_clusters_info(struct spdk_jsonrpc_request *request, const char *name)
|
|
{
|
|
struct bdev_rbd_cluster *entry;
|
|
struct spdk_json_write_ctx *w;
|
|
|
|
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
|
|
|
|
if (STAILQ_EMPTY(&g_map_bdev_rbd_cluster)) {
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return -ENOENT;
|
|
}
|
|
|
|
/* If cluster name is provided */
|
|
if (name) {
|
|
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
|
|
if (strcmp(name, entry->name) == 0) {
|
|
w = spdk_jsonrpc_begin_result(request);
|
|
dump_single_cluster_entry(entry, w);
|
|
spdk_jsonrpc_end_result(request, w);
|
|
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return -ENOENT;
|
|
}
|
|
|
|
w = spdk_jsonrpc_begin_result(request);
|
|
spdk_json_write_array_begin(w);
|
|
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
|
|
dump_single_cluster_entry(entry, w);
|
|
}
|
|
spdk_json_write_array_end(w);
|
|
spdk_jsonrpc_end_result(request, w);
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct spdk_bdev_fn_table rbd_fn_table = {
|
|
.destruct = bdev_rbd_destruct,
|
|
.submit_request = bdev_rbd_submit_request,
|
|
.io_type_supported = bdev_rbd_io_type_supported,
|
|
.get_io_channel = bdev_rbd_get_io_channel,
|
|
.dump_info_json = bdev_rbd_dump_info_json,
|
|
.write_config_json = bdev_rbd_write_config_json,
|
|
};
|
|
|
|
static int
|
|
rbd_register_cluster(const char *name, const char *user_id, const char *const *config_param,
|
|
const char *config_file)
|
|
{
|
|
struct bdev_rbd_cluster *entry;
|
|
int rc;
|
|
|
|
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
|
|
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
|
|
if (strcmp(name, entry->name) == 0) {
|
|
SPDK_ERRLOG("Cluster name=%s already exists\n", name);
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
entry = calloc(1, sizeof(*entry));
|
|
if (!entry) {
|
|
SPDK_ERRLOG("Cannot allocate an entry for name=%s\n", name);
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return -1;
|
|
}
|
|
|
|
entry->name = strdup(name);
|
|
if (entry->name == NULL) {
|
|
SPDK_ERRLOG("Failed to save the name =%s on entry =%p\n", name, entry);
|
|
goto err_handle;
|
|
}
|
|
|
|
if (user_id) {
|
|
entry->user_id = strdup(user_id);
|
|
if (entry->user_id == NULL) {
|
|
SPDK_ERRLOG("Failed to save the str =%s on entry =%p\n", user_id, entry);
|
|
goto err_handle;
|
|
}
|
|
}
|
|
|
|
/* The first priority is the config_param, then we use the config_file */
|
|
if (config_param) {
|
|
entry->config_param = bdev_rbd_dup_config(config_param);
|
|
if (entry->config_param == NULL) {
|
|
SPDK_ERRLOG("Failed to save the config_param=%p on entry = %p\n", config_param, entry);
|
|
goto err_handle;
|
|
}
|
|
} else if (config_file) {
|
|
entry->config_file = strdup(config_file);
|
|
if (entry->config_file == NULL) {
|
|
SPDK_ERRLOG("Failed to save the config_file=%s on entry = %p\n", config_file, entry);
|
|
goto err_handle;
|
|
}
|
|
}
|
|
|
|
rc = rados_create(&entry->cluster, user_id);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to create rados_t struct\n");
|
|
goto err_handle;
|
|
}
|
|
|
|
if (config_param) {
|
|
const char *const *config_entry = config_param;
|
|
while (*config_entry) {
|
|
rc = rados_conf_set(entry->cluster, config_entry[0], config_entry[1]);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to set %s = %s\n", config_entry[0], config_entry[1]);
|
|
rados_shutdown(entry->cluster);
|
|
goto err_handle;
|
|
}
|
|
config_entry += 2;
|
|
}
|
|
} else {
|
|
rc = rados_conf_read_file(entry->cluster, entry->config_file);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to read conf file\n");
|
|
rados_shutdown(entry->cluster);
|
|
goto err_handle;
|
|
}
|
|
}
|
|
|
|
rc = rados_connect(entry->cluster);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to connect to rbd_pool on cluster=%p\n", entry->cluster);
|
|
rados_shutdown(entry->cluster);
|
|
goto err_handle;
|
|
}
|
|
|
|
STAILQ_INSERT_TAIL(&g_map_bdev_rbd_cluster, entry, link);
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
|
|
return 0;
|
|
|
|
err_handle:
|
|
bdev_rbd_cluster_free(entry);
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
bdev_rbd_unregister_cluster(const char *name)
|
|
{
|
|
struct bdev_rbd_cluster *entry;
|
|
int rc = 0;
|
|
|
|
if (name == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
|
|
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
|
|
if (strcmp(name, entry->name) == 0) {
|
|
if (entry->ref == 0) {
|
|
STAILQ_REMOVE(&g_map_bdev_rbd_cluster, entry, bdev_rbd_cluster, link);
|
|
rados_shutdown(entry->cluster);
|
|
bdev_rbd_cluster_free(entry);
|
|
} else {
|
|
SPDK_ERRLOG("Cluster with name=%p is still used and we cannot delete it\n",
|
|
entry->name);
|
|
rc = -1;
|
|
}
|
|
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
return rc;
|
|
}
|
|
}
|
|
|
|
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
|
|
|
|
SPDK_ERRLOG("Could not find the cluster name =%p\n", name);
|
|
|
|
return -1;
|
|
}
|
|
|
|
static void *
|
|
_bdev_rbd_register_cluster(void *arg)
|
|
{
|
|
struct cluster_register_info *info = arg;
|
|
void *ret = arg;
|
|
int rc;
|
|
|
|
rc = rbd_register_cluster((const char *)info->name, (const char *)info->user_id,
|
|
(const char *const *)info->config_param, (const char *)info->config_file);
|
|
if (rc) {
|
|
ret = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int
|
|
bdev_rbd_register_cluster(struct cluster_register_info *info)
|
|
{
|
|
assert(info != NULL);
|
|
|
|
/* Rados cluster info need to be created in non SPDK-thread to avoid CPU
|
|
* resource contention */
|
|
if (spdk_call_unaffinitized(_bdev_rbd_register_cluster, info) == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
|
|
const char *pool_name,
|
|
const char *const *config,
|
|
const char *rbd_name,
|
|
uint32_t block_size,
|
|
const char *cluster_name)
|
|
{
|
|
struct bdev_rbd *rbd;
|
|
int ret;
|
|
|
|
if ((pool_name == NULL) || (rbd_name == NULL)) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
rbd = calloc(1, sizeof(struct bdev_rbd));
|
|
if (rbd == NULL) {
|
|
SPDK_ERRLOG("Failed to allocate bdev_rbd struct\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
ret = pthread_mutex_init(&rbd->mutex, NULL);
|
|
if (ret) {
|
|
SPDK_ERRLOG("Cannot init mutex on rbd=%p\n", rbd->disk.name);
|
|
free(rbd);
|
|
return ret;
|
|
}
|
|
|
|
rbd->pfd = -1;
|
|
rbd->rbd_name = strdup(rbd_name);
|
|
if (!rbd->rbd_name) {
|
|
bdev_rbd_free(rbd);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (user_id) {
|
|
rbd->user_id = strdup(user_id);
|
|
if (!rbd->user_id) {
|
|
bdev_rbd_free(rbd);
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
|
|
if (cluster_name) {
|
|
rbd->cluster_name = strdup(cluster_name);
|
|
if (!rbd->cluster_name) {
|
|
bdev_rbd_free(rbd);
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
rbd->pool_name = strdup(pool_name);
|
|
if (!rbd->pool_name) {
|
|
bdev_rbd_free(rbd);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (config && !(rbd->config = bdev_rbd_dup_config(config))) {
|
|
bdev_rbd_free(rbd);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
ret = bdev_rbd_init(rbd);
|
|
if (ret < 0) {
|
|
bdev_rbd_free(rbd);
|
|
SPDK_ERRLOG("Failed to init rbd device\n");
|
|
return ret;
|
|
}
|
|
|
|
if (name) {
|
|
rbd->disk.name = strdup(name);
|
|
} else {
|
|
rbd->disk.name = spdk_sprintf_alloc("Ceph%d", bdev_rbd_count);
|
|
}
|
|
if (!rbd->disk.name) {
|
|
bdev_rbd_free(rbd);
|
|
return -ENOMEM;
|
|
}
|
|
rbd->disk.product_name = "Ceph Rbd Disk";
|
|
bdev_rbd_count++;
|
|
|
|
rbd->disk.write_cache = 0;
|
|
rbd->disk.blocklen = block_size;
|
|
rbd->disk.blockcnt = rbd->info.size / rbd->disk.blocklen;
|
|
rbd->disk.ctxt = rbd;
|
|
rbd->disk.fn_table = &rbd_fn_table;
|
|
rbd->disk.module = &rbd_if;
|
|
|
|
SPDK_NOTICELOG("Add %s rbd disk to lun\n", rbd->disk.name);
|
|
|
|
spdk_io_device_register(rbd, bdev_rbd_create_cb,
|
|
bdev_rbd_destroy_cb,
|
|
sizeof(struct bdev_rbd_io_channel),
|
|
rbd_name);
|
|
ret = spdk_bdev_register(&rbd->disk);
|
|
if (ret) {
|
|
spdk_io_device_unregister(rbd, NULL);
|
|
bdev_rbd_free(rbd);
|
|
return ret;
|
|
}
|
|
|
|
*bdev = &(rbd->disk);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn, void *cb_arg)
|
|
{
|
|
if (!bdev || bdev->module != &rbd_if) {
|
|
cb_fn(cb_arg, -ENODEV);
|
|
return;
|
|
}
|
|
|
|
spdk_bdev_unregister(bdev, cb_fn, cb_arg);
|
|
}
|
|
|
|
int
|
|
bdev_rbd_resize(struct spdk_bdev *bdev, const uint64_t new_size_in_mb)
|
|
{
|
|
struct spdk_io_channel *ch;
|
|
struct bdev_rbd_io_channel *rbd_io_ch;
|
|
int rc;
|
|
uint64_t new_size_in_byte;
|
|
uint64_t current_size_in_mb;
|
|
|
|
if (bdev->module != &rbd_if) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
|
|
if (current_size_in_mb > new_size_in_mb) {
|
|
SPDK_ERRLOG("The new bdev size must be lager than current bdev size.\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
ch = bdev_rbd_get_io_channel(bdev);
|
|
rbd_io_ch = spdk_io_channel_get_ctx(ch);
|
|
new_size_in_byte = new_size_in_mb * 1024 * 1024;
|
|
|
|
rc = rbd_resize(rbd_io_ch->disk->image, new_size_in_byte);
|
|
spdk_put_io_channel(ch);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("failed to resize the ceph bdev.\n");
|
|
return rc;
|
|
}
|
|
|
|
rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("failed to notify block cnt change.\n");
|
|
return rc;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_group_poll(void *arg)
|
|
{
|
|
struct bdev_rbd_group_channel *group_ch = arg;
|
|
struct epoll_event events[MAX_EVENTS_PER_POLL];
|
|
int num_events, i;
|
|
|
|
num_events = epoll_wait(group_ch->epoll_fd, events, MAX_EVENTS_PER_POLL, 0);
|
|
|
|
if (num_events <= 0) {
|
|
return SPDK_POLLER_IDLE;
|
|
}
|
|
|
|
for (i = 0; i < num_events; i++) {
|
|
bdev_rbd_io_poll((struct bdev_rbd *)events[i].data.ptr);
|
|
}
|
|
|
|
return SPDK_POLLER_BUSY;
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_group_create_cb(void *io_device, void *ctx_buf)
|
|
{
|
|
struct bdev_rbd_group_channel *ch = ctx_buf;
|
|
|
|
ch->epoll_fd = epoll_create1(0);
|
|
if (ch->epoll_fd < 0) {
|
|
SPDK_ERRLOG("Could not create epoll fd on io device=%p\n", io_device);
|
|
return -1;
|
|
}
|
|
|
|
ch->poller = SPDK_POLLER_REGISTER(bdev_rbd_group_poll, ch, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_group_destroy_cb(void *io_device, void *ctx_buf)
|
|
{
|
|
struct bdev_rbd_group_channel *ch = ctx_buf;
|
|
|
|
if (ch->epoll_fd >= 0) {
|
|
close(ch->epoll_fd);
|
|
}
|
|
|
|
spdk_poller_unregister(&ch->poller);
|
|
}
|
|
|
|
static int
|
|
bdev_rbd_library_init(void)
|
|
{
|
|
spdk_io_device_register(&rbd_if, bdev_rbd_group_create_cb, bdev_rbd_group_destroy_cb,
|
|
sizeof(struct bdev_rbd_group_channel), "bdev_rbd_poll_groups");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bdev_rbd_library_fini(void)
|
|
{
|
|
spdk_io_device_unregister(&rbd_if, NULL);
|
|
}
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT(bdev_rbd)
|