Spdk/module/bdev/rbd/bdev_rbd.c
Jim Harris 488570ebd4 Replace most BSD 3-clause license text with SPDX identifier.
Many open source projects have moved to using SPDX identifiers
to specify license information, reducing the amount of
boilerplate code in every source file.  This patch replaces
the bulk of SPDK .c, .cpp and Makefiles with the BSD-3-Clause
identifier.

Almost all of these files share the exact same license text,
and this patch only modifies the files that contain the
most common license text.  There can be slight variations
because the third clause contains company names - most say
"Intel Corporation", but there are instances for Nvidia,
Samsung, Eideticom and even "the copyright holder".

Used a bash script to automate replacement of the license text
with SPDX identifier which is checked into scripts/spdx.sh.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Iaa88ab5e92ea471691dc298cfe41ebfb5d169780
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12904
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Dong Yi <dongx.yi@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: Paul Luse <paul.e.luse@intel.com>
Reviewed-by: <qun.wan@intel.com>
2022-06-09 07:35:12 +00:00

1344 lines
31 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) Intel Corporation.
* All rights reserved.
*/
#include "spdk/stdinc.h"
#include "bdev_rbd.h"
#include <rbd/librbd.h>
#include <rados/librados.h>
#include "spdk/env.h"
#include "spdk/bdev.h"
#include "spdk/thread.h"
#include "spdk/json.h"
#include "spdk/string.h"
#include "spdk/util.h"
#include "spdk/likely.h"
#include "spdk/bdev_module.h"
#include "spdk/log.h"
static int bdev_rbd_count = 0;
struct bdev_rbd {
struct spdk_bdev disk;
char *rbd_name;
char *user_id;
char *pool_name;
char **config;
rados_t cluster;
rados_t *cluster_p;
char *cluster_name;
rados_ioctx_t io_ctx;
rbd_image_t image;
rbd_image_info_t info;
pthread_mutex_t mutex;
struct spdk_thread *main_td;
struct spdk_thread *destruct_td;
uint32_t ch_count;
struct spdk_io_channel *group_ch;
TAILQ_ENTRY(bdev_rbd) tailq;
struct spdk_poller *reset_timer;
struct spdk_bdev_io *reset_bdev_io;
};
struct bdev_rbd_io_channel {
struct bdev_rbd *disk;
};
struct bdev_rbd_io {
struct spdk_thread *submit_td;
enum spdk_bdev_io_status status;
rbd_completion_t comp;
size_t total_len;
};
struct bdev_rbd_cluster {
char *name;
char *user_id;
char **config_param;
char *config_file;
char *key_file;
rados_t cluster;
uint32_t ref;
STAILQ_ENTRY(bdev_rbd_cluster) link;
};
static STAILQ_HEAD(, bdev_rbd_cluster) g_map_bdev_rbd_cluster = STAILQ_HEAD_INITIALIZER(
g_map_bdev_rbd_cluster);
static pthread_mutex_t g_map_bdev_rbd_cluster_mutex = PTHREAD_MUTEX_INITIALIZER;
static void
bdev_rbd_cluster_free(struct bdev_rbd_cluster *entry)
{
assert(entry != NULL);
bdev_rbd_free_config(entry->config_param);
free(entry->config_file);
free(entry->key_file);
free(entry->user_id);
free(entry->name);
free(entry);
}
static void
bdev_rbd_put_cluster(rados_t **cluster)
{
struct bdev_rbd_cluster *entry;
assert(cluster != NULL);
/* No need go through the map if *cluster equals to NULL */
if (*cluster == NULL) {
return;
}
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (*cluster != &entry->cluster) {
continue;
}
assert(entry->ref > 0);
entry->ref--;
*cluster = NULL;
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return;
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
SPDK_ERRLOG("Cannot find the entry for cluster=%p\n", cluster);
}
static void
bdev_rbd_free(struct bdev_rbd *rbd)
{
if (!rbd) {
return;
}
free(rbd->disk.name);
free(rbd->rbd_name);
free(rbd->user_id);
free(rbd->pool_name);
bdev_rbd_free_config(rbd->config);
if (rbd->io_ctx) {
rados_ioctx_destroy(rbd->io_ctx);
}
if (rbd->cluster_name) {
bdev_rbd_put_cluster(&rbd->cluster_p);
free(rbd->cluster_name);
} else if (rbd->cluster) {
rados_shutdown(rbd->cluster);
}
pthread_mutex_destroy(&rbd->mutex);
free(rbd);
}
void
bdev_rbd_free_config(char **config)
{
char **entry;
if (config) {
for (entry = config; *entry; entry++) {
free(*entry);
}
free(config);
}
}
char **
bdev_rbd_dup_config(const char *const *config)
{
size_t count;
char **copy;
if (!config) {
return NULL;
}
for (count = 0; config[count]; count++) {}
copy = calloc(count + 1, sizeof(*copy));
if (!copy) {
return NULL;
}
for (count = 0; config[count]; count++) {
if (!(copy[count] = strdup(config[count]))) {
bdev_rbd_free_config(copy);
return NULL;
}
}
return copy;
}
static int
bdev_rados_cluster_init(const char *user_id, const char *const *config,
rados_t *cluster)
{
int ret;
ret = rados_create(cluster, user_id);
if (ret < 0) {
SPDK_ERRLOG("Failed to create rados_t struct\n");
return -1;
}
if (config) {
const char *const *entry = config;
while (*entry) {
ret = rados_conf_set(*cluster, entry[0], entry[1]);
if (ret < 0) {
SPDK_ERRLOG("Failed to set %s = %s\n", entry[0], entry[1]);
rados_shutdown(*cluster);
return -1;
}
entry += 2;
}
} else {
ret = rados_conf_read_file(*cluster, NULL);
if (ret < 0) {
SPDK_ERRLOG("Failed to read conf file\n");
rados_shutdown(*cluster);
return -1;
}
}
ret = rados_connect(*cluster);
if (ret < 0) {
SPDK_ERRLOG("Failed to connect to rbd_pool\n");
rados_shutdown(*cluster);
return -1;
}
return 0;
}
static int
bdev_rbd_get_cluster(const char *cluster_name, rados_t **cluster)
{
struct bdev_rbd_cluster *entry;
if (cluster == NULL) {
SPDK_ERRLOG("cluster should not be NULL\n");
return -1;
}
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (strcmp(cluster_name, entry->name) == 0) {
entry->ref++;
*cluster = &entry->cluster;
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return 0;
}
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return -1;
}
static int
bdev_rbd_shared_cluster_init(const char *cluster_name, rados_t **cluster)
{
int ret;
ret = bdev_rbd_get_cluster(cluster_name, cluster);
if (ret < 0) {
SPDK_ERRLOG("Failed to create rados_t struct\n");
return -1;
}
return ret;
}
static void *
bdev_rbd_cluster_handle(void *arg)
{
void *ret = arg;
struct bdev_rbd *rbd = arg;
int rc;
rc = bdev_rados_cluster_init(rbd->user_id, (const char *const *)rbd->config,
&rbd->cluster);
if (rc < 0) {
SPDK_ERRLOG("Failed to create rados cluster for user_id=%s and rbd_pool=%s\n",
rbd->user_id ? rbd->user_id : "admin (the default)", rbd->pool_name);
ret = NULL;
}
return ret;
}
static void *
bdev_rbd_init_context(void *arg)
{
struct bdev_rbd *rbd = arg;
int rc;
if (rados_ioctx_create(*(rbd->cluster_p), rbd->pool_name, &rbd->io_ctx) < 0) {
SPDK_ERRLOG("Failed to create ioctx on rbd=%p\n", rbd);
return NULL;
}
rc = rbd_open(rbd->io_ctx, rbd->rbd_name, &rbd->image, NULL);
if (rc < 0) {
SPDK_ERRLOG("Failed to open specified rbd device\n");
return NULL;
}
rc = rbd_stat(rbd->image, &rbd->info, sizeof(rbd->info));
rbd_close(rbd->image);
if (rc < 0) {
SPDK_ERRLOG("Failed to stat specified rbd device\n");
return NULL;
}
return arg;
}
static int
bdev_rbd_init(struct bdev_rbd *rbd)
{
int ret = 0;
if (!rbd->cluster_name) {
rbd->cluster_p = &rbd->cluster;
/* Cluster should be created in non-SPDK thread to avoid conflict between
* Rados and SPDK thread */
if (spdk_call_unaffinitized(bdev_rbd_cluster_handle, rbd) == NULL) {
SPDK_ERRLOG("Cannot create the rados object on rbd=%p\n", rbd);
return -1;
}
} else {
ret = bdev_rbd_shared_cluster_init(rbd->cluster_name, &rbd->cluster_p);
if (ret < 0) {
SPDK_ERRLOG("Failed to create rados object for rbd =%p on cluster_name=%s\n",
rbd, rbd->cluster_name);
return -1;
}
}
if (spdk_call_unaffinitized(bdev_rbd_init_context, rbd) == NULL) {
SPDK_ERRLOG("Cannot init rbd context for rbd=%p\n", rbd);
}
return ret;
}
static void
bdev_rbd_exit(rbd_image_t image)
{
rbd_flush(image);
rbd_close(image);
}
static void
_bdev_rbd_io_complete(void *_rbd_io)
{
struct bdev_rbd_io *rbd_io = _rbd_io;
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(rbd_io), rbd_io->status);
}
static void
bdev_rbd_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
{
struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
struct spdk_thread *current_thread = spdk_get_thread();
rbd_io->status = status;
assert(rbd_io->submit_td != NULL);
if (rbd_io->submit_td != current_thread) {
spdk_thread_send_msg(rbd_io->submit_td, _bdev_rbd_io_complete, rbd_io);
} else {
_bdev_rbd_io_complete(rbd_io);
}
}
static void
bdev_rbd_finish_aiocb(rbd_completion_t cb, void *arg)
{
int io_status;
struct spdk_bdev_io *bdev_io;
struct bdev_rbd_io *rbd_io;
enum spdk_bdev_io_status bio_status;
bdev_io = rbd_aio_get_arg(cb);
rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
io_status = rbd_aio_get_return_value(cb);
bio_status = SPDK_BDEV_IO_STATUS_SUCCESS;
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
if ((int)rbd_io->total_len != io_status) {
bio_status = SPDK_BDEV_IO_STATUS_FAILED;
}
} else {
/* For others, 0 means success */
if (io_status != 0) {
bio_status = SPDK_BDEV_IO_STATUS_FAILED;
}
}
rbd_aio_release(cb);
bdev_rbd_io_complete(bdev_io, bio_status);
}
static void
bdev_rbd_start_aio(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io,
struct iovec *iov, int iovcnt, uint64_t offset, size_t len)
{
int ret;
struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
rbd_image_t image = disk->image;
ret = rbd_aio_create_completion(bdev_io, bdev_rbd_finish_aiocb,
&rbd_io->comp);
if (ret < 0) {
goto err;
}
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
rbd_io->total_len = len;
if (spdk_likely(iovcnt == 1)) {
ret = rbd_aio_read(image, offset, iov[0].iov_len, iov[0].iov_base, rbd_io->comp);
} else {
ret = rbd_aio_readv(image, iov, iovcnt, offset, rbd_io->comp);
}
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
if (spdk_likely(iovcnt == 1)) {
ret = rbd_aio_write(image, offset, iov[0].iov_len, iov[0].iov_base, rbd_io->comp);
} else {
ret = rbd_aio_writev(image, iov, iovcnt, offset, rbd_io->comp);
}
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) {
ret = rbd_aio_discard(image, offset, len, rbd_io->comp);
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
ret = rbd_aio_flush(image, rbd_io->comp);
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE_ZEROES) {
ret = rbd_aio_write_zeroes(image, offset, len, rbd_io->comp, /* zero_flags */ 0, /* op_flags */ 0);
}
if (ret < 0) {
rbd_aio_release(rbd_io->comp);
goto err;
}
return;
err:
bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
}
static int bdev_rbd_library_init(void);
static void bdev_rbd_library_fini(void);
static int
bdev_rbd_get_ctx_size(void)
{
return sizeof(struct bdev_rbd_io);
}
static struct spdk_bdev_module rbd_if = {
.name = "rbd",
.module_init = bdev_rbd_library_init,
.module_fini = bdev_rbd_library_fini,
.get_ctx_size = bdev_rbd_get_ctx_size,
};
SPDK_BDEV_MODULE_REGISTER(rbd, &rbd_if)
static int
bdev_rbd_reset_timer(void *arg)
{
struct bdev_rbd *disk = arg;
/*
* TODO: This should check if any I/O is still in flight before completing the reset.
* For now, just complete after the timer expires.
*/
bdev_rbd_io_complete(disk->reset_bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
spdk_poller_unregister(&disk->reset_timer);
disk->reset_bdev_io = NULL;
return SPDK_POLLER_BUSY;
}
static void
bdev_rbd_reset(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io)
{
/*
* HACK: Since librbd doesn't provide any way to cancel outstanding aio, just kick off a
* timer to wait for in-flight I/O to complete.
*/
assert(disk->reset_bdev_io == NULL);
disk->reset_bdev_io = bdev_io;
disk->reset_timer = SPDK_POLLER_REGISTER(bdev_rbd_reset_timer, disk, 1 * 1000 * 1000);
}
static void
_bdev_rbd_destruct_done(void *io_device)
{
struct bdev_rbd *rbd = io_device;
assert(rbd != NULL);
assert(rbd->ch_count == 0);
spdk_bdev_destruct_done(&rbd->disk, 0);
bdev_rbd_free(rbd);
}
static void
bdev_rbd_free_cb(void *io_device)
{
struct bdev_rbd *rbd = io_device;
/* The io device has been unregistered. Send a message back to the
* original thread that started the destruct operation, so that the
* bdev unregister callback is invoked on the same thread that started
* this whole process.
*/
spdk_thread_send_msg(rbd->destruct_td, _bdev_rbd_destruct_done, rbd);
}
static void
_bdev_rbd_destruct(void *ctx)
{
struct bdev_rbd *rbd = ctx;
spdk_io_device_unregister(rbd, bdev_rbd_free_cb);
}
static int
bdev_rbd_destruct(void *ctx)
{
struct bdev_rbd *rbd = ctx;
struct spdk_thread *td;
if (rbd->main_td == NULL) {
td = spdk_get_thread();
} else {
td = rbd->main_td;
}
/* Start the destruct operation on the rbd bdev's
* main thread. This guarantees it will only start
* executing after any messages related to channel
* deletions have finished completing. *Always*
* send a message, even if this function gets called
* from the main thread, in case there are pending
* channel delete messages in flight to this thread.
*/
assert(rbd->destruct_td == NULL);
rbd->destruct_td = td;
spdk_thread_send_msg(td, _bdev_rbd_destruct, rbd);
/* Return 1 to indicate the destruct path is asynchronous. */
return 1;
}
static void
bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
bool success)
{
struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
if (!success) {
bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
return;
}
bdev_rbd_start_aio(disk,
bdev_io,
bdev_io->u.bdev.iovs,
bdev_io->u.bdev.iovcnt,
bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
}
static void
_bdev_rbd_submit_request(void *ctx)
{
struct spdk_bdev_io *bdev_io = ctx;
struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_READ:
spdk_bdev_io_get_buf(bdev_io, bdev_rbd_get_buf_cb,
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
break;
case SPDK_BDEV_IO_TYPE_WRITE:
case SPDK_BDEV_IO_TYPE_UNMAP:
case SPDK_BDEV_IO_TYPE_FLUSH:
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
bdev_rbd_start_aio(disk,
bdev_io,
bdev_io->u.bdev.iovs,
bdev_io->u.bdev.iovcnt,
bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
break;
case SPDK_BDEV_IO_TYPE_RESET:
bdev_rbd_reset((struct bdev_rbd *)bdev_io->bdev->ctxt,
bdev_io);
break;
default:
SPDK_ERRLOG("Unsupported IO type =%d\n", bdev_io->type);
bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
break;
}
}
static void
bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{
struct spdk_thread *submit_td = spdk_io_channel_get_thread(ch);
struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
rbd_io->submit_td = submit_td;
if (disk->main_td != submit_td) {
spdk_thread_send_msg(disk->main_td, _bdev_rbd_submit_request, bdev_io);
} else {
_bdev_rbd_submit_request(bdev_io);
}
}
static bool
bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
{
switch (io_type) {
case SPDK_BDEV_IO_TYPE_READ:
case SPDK_BDEV_IO_TYPE_WRITE:
case SPDK_BDEV_IO_TYPE_UNMAP:
case SPDK_BDEV_IO_TYPE_FLUSH:
case SPDK_BDEV_IO_TYPE_RESET:
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
return true;
default:
return false;
}
}
static void
bdev_rbd_free_channel_resources(struct bdev_rbd *disk)
{
assert(disk != NULL);
assert(disk->main_td == spdk_get_thread());
assert(disk->ch_count == 0);
spdk_put_io_channel(disk->group_ch);
if (disk->image) {
bdev_rbd_exit(disk->image);
}
disk->main_td = NULL;
disk->group_ch = NULL;
}
static void *
bdev_rbd_handle(void *arg)
{
struct bdev_rbd *disk = arg;
void *ret = arg;
if (rbd_open(disk->io_ctx, disk->rbd_name, &disk->image, NULL) < 0) {
SPDK_ERRLOG("Failed to open specified rbd device\n");
ret = NULL;
}
return ret;
}
static int
_bdev_rbd_create_cb(struct bdev_rbd *disk)
{
disk->group_ch = spdk_get_io_channel(&rbd_if);
assert(disk->group_ch != NULL);
if (spdk_call_unaffinitized(bdev_rbd_handle, disk) == NULL) {
bdev_rbd_free_channel_resources(disk);
return -1;
}
return 0;
}
static int
bdev_rbd_create_cb(void *io_device, void *ctx_buf)
{
struct bdev_rbd_io_channel *ch = ctx_buf;
struct bdev_rbd *disk = io_device;
int rc;
ch->disk = disk;
pthread_mutex_lock(&disk->mutex);
if (disk->ch_count == 0) {
assert(disk->main_td == NULL);
rc = _bdev_rbd_create_cb(disk);
if (rc) {
SPDK_ERRLOG("Cannot create channel for disk=%p\n", disk);
pthread_mutex_unlock(&disk->mutex);
return rc;
}
disk->main_td = spdk_get_thread();
}
disk->ch_count++;
pthread_mutex_unlock(&disk->mutex);
return 0;
}
static void
_bdev_rbd_destroy_cb(void *ctx)
{
struct bdev_rbd *disk = ctx;
pthread_mutex_lock(&disk->mutex);
assert(disk->ch_count > 0);
disk->ch_count--;
if (disk->ch_count > 0) {
/* A new channel was created between when message was sent and this function executed */
pthread_mutex_unlock(&disk->mutex);
return;
}
bdev_rbd_free_channel_resources(disk);
pthread_mutex_unlock(&disk->mutex);
}
static void
bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
{
struct bdev_rbd *disk = io_device;
struct spdk_thread *thread;
pthread_mutex_lock(&disk->mutex);
assert(disk->ch_count > 0);
disk->ch_count--;
if (disk->ch_count == 0) {
assert(disk->main_td != NULL);
if (disk->main_td != spdk_get_thread()) {
/* The final channel was destroyed on a different thread
* than where the first channel was created. Pass a message
* to the main thread to unregister the poller. */
disk->ch_count++;
thread = disk->main_td;
pthread_mutex_unlock(&disk->mutex);
spdk_thread_send_msg(thread, _bdev_rbd_destroy_cb, disk);
return;
}
bdev_rbd_free_channel_resources(disk);
}
pthread_mutex_unlock(&disk->mutex);
}
static struct spdk_io_channel *
bdev_rbd_get_io_channel(void *ctx)
{
struct bdev_rbd *rbd_bdev = ctx;
return spdk_get_io_channel(rbd_bdev);
}
static void
bdev_rbd_cluster_dump_entry(const char *cluster_name, struct spdk_json_write_ctx *w)
{
struct bdev_rbd_cluster *entry;
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (strcmp(cluster_name, entry->name)) {
continue;
}
if (entry->user_id) {
spdk_json_write_named_string(w, "user_id", entry->user_id);
}
if (entry->config_param) {
char **config_entry = entry->config_param;
spdk_json_write_named_object_begin(w, "config_param");
while (*config_entry) {
spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
config_entry += 2;
}
spdk_json_write_object_end(w);
}
if (entry->config_file) {
spdk_json_write_named_string(w, "config_file", entry->config_file);
}
if (entry->key_file) {
spdk_json_write_named_string(w, "key_file", entry->key_file);
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return;
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
}
static int
bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
{
struct bdev_rbd *rbd_bdev = ctx;
spdk_json_write_named_object_begin(w, "rbd");
spdk_json_write_named_string(w, "pool_name", rbd_bdev->pool_name);
spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name);
if (rbd_bdev->cluster_name) {
bdev_rbd_cluster_dump_entry(rbd_bdev->cluster_name, w);
goto end;
}
if (rbd_bdev->user_id) {
spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id);
}
if (rbd_bdev->config) {
char **entry = rbd_bdev->config;
spdk_json_write_named_object_begin(w, "config");
while (*entry) {
spdk_json_write_named_string(w, entry[0], entry[1]);
entry += 2;
}
spdk_json_write_object_end(w);
}
end:
spdk_json_write_object_end(w);
return 0;
}
static void
bdev_rbd_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
{
struct bdev_rbd *rbd = bdev->ctxt;
char uuid_str[SPDK_UUID_STRING_LEN];
spdk_json_write_object_begin(w);
spdk_json_write_named_string(w, "method", "bdev_rbd_create");
spdk_json_write_named_object_begin(w, "params");
spdk_json_write_named_string(w, "name", bdev->name);
spdk_json_write_named_string(w, "pool_name", rbd->pool_name);
spdk_json_write_named_string(w, "rbd_name", rbd->rbd_name);
spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
if (rbd->user_id) {
spdk_json_write_named_string(w, "user_id", rbd->user_id);
}
if (rbd->config) {
char **entry = rbd->config;
spdk_json_write_named_object_begin(w, "config");
while (*entry) {
spdk_json_write_named_string(w, entry[0], entry[1]);
entry += 2;
}
spdk_json_write_object_end(w);
}
spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
spdk_json_write_named_string(w, "uuid", uuid_str);
spdk_json_write_object_end(w);
spdk_json_write_object_end(w);
}
static void
dump_single_cluster_entry(struct bdev_rbd_cluster *entry, struct spdk_json_write_ctx *w)
{
assert(entry != NULL);
spdk_json_write_object_begin(w);
spdk_json_write_named_string(w, "cluster_name", entry->name);
if (entry->user_id) {
spdk_json_write_named_string(w, "user_id", entry->user_id);
}
if (entry->config_param) {
char **config_entry = entry->config_param;
spdk_json_write_named_object_begin(w, "config_param");
while (*config_entry) {
spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
config_entry += 2;
}
spdk_json_write_object_end(w);
}
if (entry->config_file) {
spdk_json_write_named_string(w, "config_file", entry->config_file);
}
if (entry->key_file) {
spdk_json_write_named_string(w, "key_file", entry->key_file);
}
spdk_json_write_object_end(w);
}
int
bdev_rbd_get_clusters_info(struct spdk_jsonrpc_request *request, const char *name)
{
struct bdev_rbd_cluster *entry;
struct spdk_json_write_ctx *w;
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
if (STAILQ_EMPTY(&g_map_bdev_rbd_cluster)) {
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return -ENOENT;
}
/* If cluster name is provided */
if (name) {
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (strcmp(name, entry->name) == 0) {
w = spdk_jsonrpc_begin_result(request);
dump_single_cluster_entry(entry, w);
spdk_jsonrpc_end_result(request, w);
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return 0;
}
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return -ENOENT;
}
w = spdk_jsonrpc_begin_result(request);
spdk_json_write_array_begin(w);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
dump_single_cluster_entry(entry, w);
}
spdk_json_write_array_end(w);
spdk_jsonrpc_end_result(request, w);
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return 0;
}
static const struct spdk_bdev_fn_table rbd_fn_table = {
.destruct = bdev_rbd_destruct,
.submit_request = bdev_rbd_submit_request,
.io_type_supported = bdev_rbd_io_type_supported,
.get_io_channel = bdev_rbd_get_io_channel,
.dump_info_json = bdev_rbd_dump_info_json,
.write_config_json = bdev_rbd_write_config_json,
};
static int
rbd_register_cluster(const char *name, const char *user_id, const char *const *config_param,
const char *config_file, const char *key_file)
{
struct bdev_rbd_cluster *entry;
int rc;
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (strcmp(name, entry->name) == 0) {
SPDK_ERRLOG("Cluster name=%s already exists\n", name);
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return -1;
}
}
entry = calloc(1, sizeof(*entry));
if (!entry) {
SPDK_ERRLOG("Cannot allocate an entry for name=%s\n", name);
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return -1;
}
entry->name = strdup(name);
if (entry->name == NULL) {
SPDK_ERRLOG("Failed to save the name =%s on entry =%p\n", name, entry);
goto err_handle;
}
if (user_id) {
entry->user_id = strdup(user_id);
if (entry->user_id == NULL) {
SPDK_ERRLOG("Failed to save the str =%s on entry =%p\n", user_id, entry);
goto err_handle;
}
}
/* Support specify config_param or config_file separately, or both of them. */
if (config_param) {
entry->config_param = bdev_rbd_dup_config(config_param);
if (entry->config_param == NULL) {
SPDK_ERRLOG("Failed to save the config_param=%p on entry = %p\n", config_param, entry);
goto err_handle;
}
}
if (config_file) {
entry->config_file = strdup(config_file);
if (entry->config_file == NULL) {
SPDK_ERRLOG("Failed to save the config_file=%s on entry = %p\n", config_file, entry);
goto err_handle;
}
}
if (key_file) {
entry->key_file = strdup(key_file);
if (entry->key_file == NULL) {
SPDK_ERRLOG("Failed to save the key_file=%s on entry = %p\n", key_file, entry);
goto err_handle;
}
}
rc = rados_create(&entry->cluster, user_id);
if (rc < 0) {
SPDK_ERRLOG("Failed to create rados_t struct\n");
goto err_handle;
}
/* Try default location when entry->config_file is NULL, but ignore failure when it is NULL */
rc = rados_conf_read_file(entry->cluster, entry->config_file);
if (entry->config_file && rc < 0) {
SPDK_ERRLOG("Failed to read conf file %s\n", entry->config_file);
rados_shutdown(entry->cluster);
goto err_handle;
}
if (config_param) {
const char *const *config_entry = config_param;
while (*config_entry) {
rc = rados_conf_set(entry->cluster, config_entry[0], config_entry[1]);
if (rc < 0) {
SPDK_ERRLOG("Failed to set %s = %s\n", config_entry[0], config_entry[1]);
rados_shutdown(entry->cluster);
goto err_handle;
}
config_entry += 2;
}
}
if (key_file) {
rc = rados_conf_set(entry->cluster, "keyring", key_file);
if (rc < 0) {
SPDK_ERRLOG("Failed to set keyring = %s\n", key_file);
rados_shutdown(entry->cluster);
goto err_handle;
}
}
rc = rados_connect(entry->cluster);
if (rc < 0) {
SPDK_ERRLOG("Failed to connect to rbd_pool on cluster=%p\n", entry->cluster);
rados_shutdown(entry->cluster);
goto err_handle;
}
STAILQ_INSERT_TAIL(&g_map_bdev_rbd_cluster, entry, link);
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return 0;
err_handle:
bdev_rbd_cluster_free(entry);
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return -1;
}
int
bdev_rbd_unregister_cluster(const char *name)
{
struct bdev_rbd_cluster *entry;
int rc = 0;
if (name == NULL) {
return -1;
}
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (strcmp(name, entry->name) == 0) {
if (entry->ref == 0) {
STAILQ_REMOVE(&g_map_bdev_rbd_cluster, entry, bdev_rbd_cluster, link);
rados_shutdown(entry->cluster);
bdev_rbd_cluster_free(entry);
} else {
SPDK_ERRLOG("Cluster with name=%p is still used and we cannot delete it\n",
entry->name);
rc = -1;
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return rc;
}
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
SPDK_ERRLOG("Could not find the cluster name =%p\n", name);
return -1;
}
static void *
_bdev_rbd_register_cluster(void *arg)
{
struct cluster_register_info *info = arg;
void *ret = arg;
int rc;
rc = rbd_register_cluster((const char *)info->name, (const char *)info->user_id,
(const char *const *)info->config_param, (const char *)info->config_file,
(const char *)info->key_file);
if (rc) {
ret = NULL;
}
return ret;
}
int
bdev_rbd_register_cluster(struct cluster_register_info *info)
{
assert(info != NULL);
/* Rados cluster info need to be created in non SPDK-thread to avoid CPU
* resource contention */
if (spdk_call_unaffinitized(_bdev_rbd_register_cluster, info) == NULL) {
return -1;
}
return 0;
}
int
bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
const char *pool_name,
const char *const *config,
const char *rbd_name,
uint32_t block_size,
const char *cluster_name,
const struct spdk_uuid *uuid)
{
struct bdev_rbd *rbd;
int ret;
if ((pool_name == NULL) || (rbd_name == NULL)) {
return -EINVAL;
}
rbd = calloc(1, sizeof(struct bdev_rbd));
if (rbd == NULL) {
SPDK_ERRLOG("Failed to allocate bdev_rbd struct\n");
return -ENOMEM;
}
ret = pthread_mutex_init(&rbd->mutex, NULL);
if (ret) {
SPDK_ERRLOG("Cannot init mutex on rbd=%p\n", rbd->disk.name);
free(rbd);
return ret;
}
rbd->rbd_name = strdup(rbd_name);
if (!rbd->rbd_name) {
bdev_rbd_free(rbd);
return -ENOMEM;
}
if (user_id) {
rbd->user_id = strdup(user_id);
if (!rbd->user_id) {
bdev_rbd_free(rbd);
return -ENOMEM;
}
}
if (cluster_name) {
rbd->cluster_name = strdup(cluster_name);
if (!rbd->cluster_name) {
bdev_rbd_free(rbd);
return -ENOMEM;
}
}
rbd->pool_name = strdup(pool_name);
if (!rbd->pool_name) {
bdev_rbd_free(rbd);
return -ENOMEM;
}
if (config && !(rbd->config = bdev_rbd_dup_config(config))) {
bdev_rbd_free(rbd);
return -ENOMEM;
}
ret = bdev_rbd_init(rbd);
if (ret < 0) {
bdev_rbd_free(rbd);
SPDK_ERRLOG("Failed to init rbd device\n");
return ret;
}
if (uuid) {
rbd->disk.uuid = *uuid;
} else {
spdk_uuid_generate(&rbd->disk.uuid);
}
if (name) {
rbd->disk.name = strdup(name);
} else {
rbd->disk.name = spdk_sprintf_alloc("Ceph%d", bdev_rbd_count);
}
if (!rbd->disk.name) {
bdev_rbd_free(rbd);
return -ENOMEM;
}
rbd->disk.product_name = "Ceph Rbd Disk";
bdev_rbd_count++;
rbd->disk.write_cache = 0;
rbd->disk.blocklen = block_size;
rbd->disk.blockcnt = rbd->info.size / rbd->disk.blocklen;
rbd->disk.ctxt = rbd;
rbd->disk.fn_table = &rbd_fn_table;
rbd->disk.module = &rbd_if;
SPDK_NOTICELOG("Add %s rbd disk to lun\n", rbd->disk.name);
spdk_io_device_register(rbd, bdev_rbd_create_cb,
bdev_rbd_destroy_cb,
sizeof(struct bdev_rbd_io_channel),
rbd_name);
ret = spdk_bdev_register(&rbd->disk);
if (ret) {
spdk_io_device_unregister(rbd, NULL);
bdev_rbd_free(rbd);
return ret;
}
*bdev = &(rbd->disk);
return ret;
}
void
bdev_rbd_delete(const char *name, spdk_delete_rbd_complete cb_fn, void *cb_arg)
{
int rc;
rc = spdk_bdev_unregister_by_name(name, &rbd_if, cb_fn, cb_arg);
if (rc != 0) {
cb_fn(cb_arg, rc);
}
}
static void
dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
{
}
int
bdev_rbd_resize(const char *name, const uint64_t new_size_in_mb)
{
struct spdk_bdev_desc *desc;
struct spdk_bdev *bdev;
struct spdk_io_channel *ch;
struct bdev_rbd_io_channel *rbd_io_ch;
int rc = 0;
uint64_t new_size_in_byte;
uint64_t current_size_in_mb;
rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
if (rc != 0) {
return rc;
}
bdev = spdk_bdev_desc_get_bdev(desc);
if (bdev->module != &rbd_if) {
rc = -EINVAL;
goto exit;
}
current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
if (current_size_in_mb > new_size_in_mb) {
SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n");
rc = -EINVAL;
goto exit;
}
ch = bdev_rbd_get_io_channel(bdev);
rbd_io_ch = spdk_io_channel_get_ctx(ch);
new_size_in_byte = new_size_in_mb * 1024 * 1024;
rc = rbd_resize(rbd_io_ch->disk->image, new_size_in_byte);
spdk_put_io_channel(ch);
if (rc != 0) {
SPDK_ERRLOG("failed to resize the ceph bdev.\n");
goto exit;
}
rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
if (rc != 0) {
SPDK_ERRLOG("failed to notify block cnt change.\n");
}
exit:
spdk_bdev_close(desc);
return rc;
}
static int
bdev_rbd_group_create_cb(void *io_device, void *ctx_buf)
{
return 0;
}
static void
bdev_rbd_group_destroy_cb(void *io_device, void *ctx_buf)
{
}
static int
bdev_rbd_library_init(void)
{
spdk_io_device_register(&rbd_if, bdev_rbd_group_create_cb, bdev_rbd_group_destroy_cb,
0, "bdev_rbd_poll_groups");
return 0;
}
static void
bdev_rbd_library_fini(void)
{
spdk_io_device_unregister(&rbd_if, NULL);
}
SPDK_LOG_REGISTER_COMPONENT(bdev_rbd)