bdev/rbd: Revise bdev_rbd_create rpc function.

Revise bdev_rbd_create rpc call to add an optional
parameter "--cluster-name", e.g., "--cluster-name Rados".

Then users can create a rbd bdev with registered
Rados Cluster. This shared strategy can be used to
remove the thread creation overhead if multiple rbds
are connected to the same Ceph cluster.

Signed-off-by: Ziye Yang <ziye.yang@intel.com>
Change-Id: Ide5800f8fc6b2074805272a59731c666fe279b9a
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7584
Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Ziye Yang 2021-04-22 21:51:42 +08:00 committed by Tomasz Zawadzki
parent 4e4d865fa0
commit 5c0160263c
8 changed files with 196 additions and 20 deletions

View File

@ -23,6 +23,9 @@ New RPC `bdev_rbd_register_cluster` and `bdev_rbd_unregister_cluster` was added,
and delete the rados object cluster, then users can choose the cluster to create related rbd and delete the rados object cluster, then users can choose the cluster to create related rbd
device. device.
Revised `bdev_rbd_create` parameter, it allows to use an optional parameter --cluster-name
to create a rbd bdev with an already registered Rados Cluster Object.
## v21.04: ## v21.04:
### accel ### accel

View File

@ -3421,6 +3421,7 @@ pool_name | Required | string | Pool name
rbd_name | Required | string | Image name rbd_name | Required | string | Image name
block_size | Required | number | Block size block_size | Required | number | Block size
config | Optional | string map | Explicit librados configuration config | Optional | string map | Explicit librados configuration
cluster_name | Optional | string | Rados cluster object name created in this module.
If no config is specified, Ceph configuration files must exist with If no config is specified, Ceph configuration files must exist with
all relevant settings for accessing the pool. If a config map is all relevant settings for accessing the pool. If a config map is
@ -3432,6 +3433,10 @@ secret key stored in Ceph keyrings) are enough.
When accessing the image as some user other than "admin" (the When accessing the image as some user other than "admin" (the
default), the "user_id" has to be set. default), the "user_id" has to be set.
If provided with cluster_name option, it will use the Rados cluster object
referenced by the name (created by bdev_rbd_register_cluster RPC) and ignores
"user_id + config" combination to create its own Rados cluster.
### Result ### Result
Name of newly created bdev. Name of newly created bdev.
@ -3468,6 +3473,33 @@ response:
} }
~~~ ~~~
Example request with `cluster_name`:
~~
{
"params": {
"pool_name": "rbd",
"rbd_name": "foo",
"block_size": 4096,
"cluster_name": "rbd_cluster"
},
"jsonrpc": "2.0",
"method": "bdev_rbd_create",
"id": 1
}
~~
Example response:
~~
response:
{
"jsonrpc": "2.0",
"id": 1,
"result": "Ceph0"
}
~~
## bdev_rbd_delete {#rpc_bdev_rbd_delete} ## bdev_rbd_delete {#rpc_bdev_rbd_delete}
Delete @ref bdev_config_rbd bdev Delete @ref bdev_config_rbd bdev

View File

@ -63,6 +63,8 @@ struct bdev_rbd {
char *pool_name; char *pool_name;
char **config; char **config;
rados_t cluster; rados_t cluster;
rados_t *cluster_p;
char *cluster_name;
rbd_image_info_t info; rbd_image_info_t info;
TAILQ_ENTRY(bdev_rbd) tailq; TAILQ_ENTRY(bdev_rbd) tailq;
struct spdk_poller *reset_timer; struct spdk_poller *reset_timer;
@ -112,6 +114,35 @@ bdev_rbd_cluster_free(struct bdev_rbd_cluster *entry)
free(entry); free(entry);
} }
static void
bdev_rbd_put_cluster(rados_t **cluster)
{
struct bdev_rbd_cluster *entry;
assert(cluster != NULL);
/* No need go through the map if *cluster equals to NULL */
if (*cluster == NULL) {
return;
}
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (*cluster != &entry->cluster) {
continue;
}
assert(entry->ref > 0);
entry->ref--;
*cluster = NULL;
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return;
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
SPDK_ERRLOG("Cannot find the entry for cluster=%p\n", cluster);
}
static void static void
bdev_rbd_free(struct bdev_rbd *rbd) bdev_rbd_free(struct bdev_rbd *rbd)
{ {
@ -119,15 +150,19 @@ bdev_rbd_free(struct bdev_rbd *rbd)
return; return;
} }
if (rbd->cluster) {
rados_shutdown(rbd->cluster);
}
free(rbd->disk.name); free(rbd->disk.name);
free(rbd->rbd_name); free(rbd->rbd_name);
free(rbd->user_id); free(rbd->user_id);
free(rbd->pool_name); free(rbd->pool_name);
bdev_rbd_free_config(rbd->config); bdev_rbd_free_config(rbd->config);
if (rbd->cluster_name) {
bdev_rbd_put_cluster(&rbd->cluster_p);
free(rbd->cluster_name);
} else if (rbd->cluster) {
rados_shutdown(rbd->cluster);
}
free(rbd); free(rbd);
} }
@ -209,11 +244,49 @@ bdev_rados_cluster_init(const char *user_id, const char *const *config,
return 0; return 0;
} }
static int
bdev_rbd_get_cluster(const char *cluster_name, rados_t **cluster)
{
struct bdev_rbd_cluster *entry;
if (cluster == NULL) {
SPDK_ERRLOG("cluster should not be NULL\n");
return -1;
}
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (strncmp(cluster_name, entry->name, strlen(entry->name)) == 0) {
entry->ref++;
*cluster = &entry->cluster;
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return 0;
}
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return -1;
}
static int
bdev_rbd_shared_cluster_init(const char *cluster_name, rados_t **cluster)
{
int ret;
ret = bdev_rbd_get_cluster(cluster_name, cluster);
if (ret < 0) {
SPDK_ERRLOG("Failed to create rados_t struct\n");
return -1;
}
return ret;
}
static void * static void *
bdev_rbd_cluster_handle(void *arg) bdev_rbd_cluster_handle(void *arg)
{ {
struct bdev_rbd *rbd = arg;
void *ret = arg; void *ret = arg;
struct bdev_rbd *rbd = arg;
int rc; int rc;
rc = bdev_rados_cluster_init(rbd->user_id, (const char *const *)rbd->config, rc = bdev_rados_cluster_init(rbd->user_id, (const char *const *)rbd->config,
@ -234,14 +307,24 @@ bdev_rbd_init(struct bdev_rbd *rbd)
rados_ioctx_t io_ctx = NULL; rados_ioctx_t io_ctx = NULL;
rbd_image_t image = NULL; rbd_image_t image = NULL;
if (!rbd->cluster_name) {
rbd->cluster_p = &rbd->cluster;
/* Cluster should be created in non-SPDK thread to avoid conflict between /* Cluster should be created in non-SPDK thread to avoid conflict between
* Rados and SPDK thread */ * Rados and SPDK thread */
if (spdk_call_unaffinitized(bdev_rbd_cluster_handle, rbd) == NULL) { if (spdk_call_unaffinitized(bdev_rbd_cluster_handle, rbd) == NULL) {
SPDK_ERRLOG("Cannot create the rados object on rbd=%p\n", rbd); SPDK_ERRLOG("Cannot create the rados object on rbd=%p\n", rbd);
return -1; return -1;
} }
} else {
ret = bdev_rbd_shared_cluster_init(rbd->cluster_name, &rbd->cluster_p);
if (ret < 0) {
SPDK_ERRLOG("Failed to create rados object for rbd =%p on cluster_name=%s\n",
rbd, rbd->cluster_name);
return -1;
}
}
ret = rados_ioctx_create(rbd->cluster, rbd->pool_name, &io_ctx); ret = rados_ioctx_create(*(rbd->cluster_p), rbd->pool_name, &io_ctx);
if (ret < 0) { if (ret < 0) {
SPDK_ERRLOG("Failed to create ioctx\n"); SPDK_ERRLOG("Failed to create ioctx\n");
return -1; return -1;
@ -514,9 +597,9 @@ bdev_rbd_handle(void *arg)
struct bdev_rbd_io_channel *ch = arg; struct bdev_rbd_io_channel *ch = arg;
void *ret = arg; void *ret = arg;
assert(ch->disk->cluster != NULL); assert(ch->disk->cluster_p != NULL);
if (rados_ioctx_create(ch->disk->cluster, ch->disk->pool_name, &ch->io_ctx) < 0) { if (rados_ioctx_create(*(ch->disk->cluster_p), ch->disk->pool_name, &ch->io_ctx) < 0) {
SPDK_ERRLOG("Failed to create ioctx\n"); SPDK_ERRLOG("Failed to create ioctx\n");
ret = NULL; ret = NULL;
return ret; return ret;
@ -602,6 +685,40 @@ bdev_rbd_get_io_channel(void *ctx)
return spdk_get_io_channel(rbd_bdev); return spdk_get_io_channel(rbd_bdev);
} }
static void
bdev_rbd_cluster_dump_entry(const char *cluster_name, struct spdk_json_write_ctx *w)
{
struct bdev_rbd_cluster *entry;
pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
if (strncmp(cluster_name, entry->name, strlen(entry->name))) {
continue;
}
if (entry->user_id) {
spdk_json_write_named_string(w, "user_id", entry->user_id);
}
if (entry->config_param) {
char **config_entry = entry->config_param;
spdk_json_write_named_object_begin(w, "config_param");
while (*config_entry) {
spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
config_entry += 2;
}
spdk_json_write_object_end(w);
} else if (entry->config_file) {
spdk_json_write_named_string(w, "config_file", entry->config_file);
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
return;
}
pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
}
static int static int
bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
{ {
@ -613,6 +730,11 @@ bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name); spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name);
if (rbd_bdev->cluster_name) {
bdev_rbd_cluster_dump_entry(rbd_bdev->cluster_name, w);
goto end;
}
if (rbd_bdev->user_id) { if (rbd_bdev->user_id) {
spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id); spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id);
} }
@ -628,6 +750,7 @@ bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
spdk_json_write_object_end(w); spdk_json_write_object_end(w);
} }
end:
spdk_json_write_object_end(w); spdk_json_write_object_end(w);
return 0; return 0;
@ -842,7 +965,8 @@ bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
const char *pool_name, const char *pool_name,
const char *const *config, const char *const *config,
const char *rbd_name, const char *rbd_name,
uint32_t block_size) uint32_t block_size,
const char *cluster_name)
{ {
struct bdev_rbd *rbd; struct bdev_rbd *rbd;
int ret; int ret;
@ -871,6 +995,13 @@ bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
} }
} }
if (cluster_name) {
rbd->cluster_name = strdup(cluster_name);
if (!rbd->cluster_name) {
bdev_rbd_free(rbd);
return -ENOMEM;
}
}
rbd->pool_name = strdup(pool_name); rbd->pool_name = strdup(pool_name);
if (!rbd->pool_name) { if (!rbd->pool_name) {
bdev_rbd_free(rbd); bdev_rbd_free(rbd);

View File

@ -53,7 +53,7 @@ typedef void (*spdk_delete_rbd_complete)(void *cb_arg, int bdeverrno);
int bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id, int bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
const char *pool_name, const char *pool_name,
const char *const *config, const char *const *config,
const char *rbd_name, uint32_t block_size); const char *rbd_name, uint32_t block_size, const char *cluster_name);
/** /**
* Delete rbd bdev. * Delete rbd bdev.
* *

View File

@ -44,6 +44,7 @@ struct rpc_create_rbd {
char *rbd_name; char *rbd_name;
uint32_t block_size; uint32_t block_size;
char **config; char **config;
char *cluster_name;
}; };
static void static void
@ -54,6 +55,7 @@ free_rpc_create_rbd(struct rpc_create_rbd *req)
free(req->pool_name); free(req->pool_name);
free(req->rbd_name); free(req->rbd_name);
bdev_rbd_free_config(req->config); bdev_rbd_free_config(req->config);
free(req->cluster_name);
} }
static int static int
@ -104,7 +106,8 @@ static const struct spdk_json_object_decoder rpc_create_rbd_decoders[] = {
{"pool_name", offsetof(struct rpc_create_rbd, pool_name), spdk_json_decode_string}, {"pool_name", offsetof(struct rpc_create_rbd, pool_name), spdk_json_decode_string},
{"rbd_name", offsetof(struct rpc_create_rbd, rbd_name), spdk_json_decode_string}, {"rbd_name", offsetof(struct rpc_create_rbd, rbd_name), spdk_json_decode_string},
{"block_size", offsetof(struct rpc_create_rbd, block_size), spdk_json_decode_uint32}, {"block_size", offsetof(struct rpc_create_rbd, block_size), spdk_json_decode_uint32},
{"config", offsetof(struct rpc_create_rbd, config), bdev_rbd_decode_config, true} {"config", offsetof(struct rpc_create_rbd, config), bdev_rbd_decode_config, true},
{"cluster_name", offsetof(struct rpc_create_rbd, cluster_name), spdk_json_decode_string, true}
}; };
static void static void
@ -128,7 +131,7 @@ rpc_bdev_rbd_create(struct spdk_jsonrpc_request *request,
rc = bdev_rbd_create(&bdev, req.name, req.user_id, req.pool_name, rc = bdev_rbd_create(&bdev, req.name, req.user_id, req.pool_name,
(const char *const *)req.config, (const char *const *)req.config,
req.rbd_name, req.rbd_name,
req.block_size); req.block_size, req.cluster_name);
if (rc) { if (rc) {
spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc)); spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
goto cleanup; goto cleanup;

View File

@ -676,7 +676,8 @@ if __name__ == "__main__":
config=config, config=config,
pool_name=args.pool_name, pool_name=args.pool_name,
rbd_name=args.rbd_name, rbd_name=args.rbd_name,
block_size=args.block_size)) block_size=args.block_size,
cluster_name=args.cluster_name))
p = subparsers.add_parser('bdev_rbd_create', aliases=['construct_rbd_bdev'], p = subparsers.add_parser('bdev_rbd_create', aliases=['construct_rbd_bdev'],
help='Add a bdev with ceph rbd backend') help='Add a bdev with ceph rbd backend')
@ -687,6 +688,7 @@ if __name__ == "__main__":
p.add_argument('pool_name', help='rbd pool name') p.add_argument('pool_name', help='rbd pool name')
p.add_argument('rbd_name', help='rbd image name') p.add_argument('rbd_name', help='rbd image name')
p.add_argument('block_size', help='rbd block size', type=int) p.add_argument('block_size', help='rbd block size', type=int)
p.add_argument('-c', '--cluster_name', help="cluster name to identify the Rados cluster", required=False)
p.set_defaults(func=bdev_rbd_create) p.set_defaults(func=bdev_rbd_create)
def bdev_rbd_delete(args): def bdev_rbd_delete(args):

View File

@ -694,7 +694,7 @@ def bdev_rbd_unregister_cluster(client, name):
@deprecated_alias('construct_rbd_bdev') @deprecated_alias('construct_rbd_bdev')
def bdev_rbd_create(client, pool_name, rbd_name, block_size, name=None, user=None, config=None): def bdev_rbd_create(client, pool_name, rbd_name, block_size, name=None, user=None, config=None, cluster_name=None):
"""Create a Ceph RBD block device. """Create a Ceph RBD block device.
Args: Args:
@ -704,6 +704,7 @@ def bdev_rbd_create(client, pool_name, rbd_name, block_size, name=None, user=Non
name: name of block device (optional) name: name of block device (optional)
user: Ceph user name (optional) user: Ceph user name (optional)
config: map of config keys to values (optional) config: map of config keys to values (optional)
cluster_name: Name to identify Rados cluster (optional)
Returns: Returns:
Name of created block device. Name of created block device.
@ -720,6 +721,8 @@ def bdev_rbd_create(client, pool_name, rbd_name, block_size, name=None, user=Non
params['user_id'] = user params['user_id'] = user
if config is not None: if config is not None:
params['config'] = config params['config'] = config
if cluster_name is not None:
params['cluster_name'] = cluster_name
return client.call('bdev_rbd_create', params) return client.call('bdev_rbd_create', params)

View File

@ -31,7 +31,8 @@ timing_exit start_iscsi_tgt
$rpc_py iscsi_create_portal_group $PORTAL_TAG $TARGET_IP:$ISCSI_PORT $rpc_py iscsi_create_portal_group $PORTAL_TAG $TARGET_IP:$ISCSI_PORT
$rpc_py iscsi_create_initiator_group $INITIATOR_TAG $INITIATOR_NAME $NETMASK $rpc_py iscsi_create_initiator_group $INITIATOR_TAG $INITIATOR_NAME $NETMASK
rbd_bdev="$($rpc_py bdev_rbd_create $RBD_POOL $RBD_NAME 4096)" rbd_cluster_name="$($rpc_py bdev_rbd_register_cluster iscsi_rbd_cluster)"
rbd_bdev="$($rpc_py bdev_rbd_create $RBD_POOL $RBD_NAME 4096 -c $rbd_cluster_name)"
$rpc_py bdev_get_bdevs $rpc_py bdev_get_bdevs
$rpc_py bdev_rbd_resize $rbd_bdev 2000 $rpc_py bdev_rbd_resize $rbd_bdev 2000
@ -64,6 +65,7 @@ trap - SIGINT SIGTERM EXIT
iscsicleanup iscsicleanup
$rpc_py bdev_rbd_delete $rbd_bdev $rpc_py bdev_rbd_delete $rbd_bdev
$rpc_py bdev_rbd_unregister_cluster $rbd_cluster_name
killprocess $pid killprocess $pid
rbd_cleanup rbd_cleanup