bdev/rbd: Do not submit IOs through thread sending.
Currently, we send IOs to the main_td thread. It is not needed, because all the read/write functions provided by librbd are thread safe, so we can eliminate the thread send messaging policy for read/write related functions. And with this patch, users can observe the load balance distribution of I/Os on each CPU core owned by spdk applications through spdk_top tool. In this patch, we did the following work: 1 Move rbd_open when create the bdev since we will create once. 2 Simplify the channel management. 3 Do not use thread send messaging to do the read/write I/Os. According to our experiment results showed in https://github.com/spdk/spdk/issues/2204 There will be more than 15% performance improvment in IOPS aspect for different write I/O patterns, and it also addresses the I/O Load balance issues. Fixes issue: #2204 Change-Id: I9d2851c3d772261c131f9678f4b1bf722328aabb Signed-off-by: Ziye Yang <ziye.yang@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17644 Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
3a5ebfb06d
commit
c441e27023
@ -38,11 +38,8 @@ struct bdev_rbd {
|
|||||||
rbd_image_t image;
|
rbd_image_t image;
|
||||||
|
|
||||||
rbd_image_info_t info;
|
rbd_image_info_t info;
|
||||||
pthread_mutex_t mutex;
|
|
||||||
struct spdk_thread *main_td;
|
struct spdk_thread *main_td;
|
||||||
struct spdk_thread *destruct_td;
|
struct spdk_thread *destruct_td;
|
||||||
uint32_t ch_count;
|
|
||||||
struct spdk_io_channel *group_ch;
|
|
||||||
|
|
||||||
TAILQ_ENTRY(bdev_rbd) tailq;
|
TAILQ_ENTRY(bdev_rbd) tailq;
|
||||||
struct spdk_poller *reset_timer;
|
struct spdk_poller *reset_timer;
|
||||||
@ -51,6 +48,7 @@ struct bdev_rbd {
|
|||||||
|
|
||||||
struct bdev_rbd_io_channel {
|
struct bdev_rbd_io_channel {
|
||||||
struct bdev_rbd *disk;
|
struct bdev_rbd *disk;
|
||||||
|
struct spdk_io_channel *group_ch;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bdev_rbd_io {
|
struct bdev_rbd_io {
|
||||||
@ -124,6 +122,11 @@ bdev_rbd_free(struct bdev_rbd *rbd)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rbd->image) {
|
||||||
|
rbd_flush(rbd->image);
|
||||||
|
rbd_close(rbd->image);
|
||||||
|
}
|
||||||
|
|
||||||
free(rbd->disk.name);
|
free(rbd->disk.name);
|
||||||
free(rbd->rbd_name);
|
free(rbd->rbd_name);
|
||||||
free(rbd->user_id);
|
free(rbd->user_id);
|
||||||
@ -141,7 +144,6 @@ bdev_rbd_free(struct bdev_rbd *rbd)
|
|||||||
rados_shutdown(rbd->cluster);
|
rados_shutdown(rbd->cluster);
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_destroy(&rbd->mutex);
|
|
||||||
free(rbd);
|
free(rbd);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -300,7 +302,6 @@ bdev_rbd_init_context(void *arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
rc = rbd_stat(rbd->image, &rbd->info, sizeof(rbd->info));
|
rc = rbd_stat(rbd->image, &rbd->info, sizeof(rbd->info));
|
||||||
rbd_close(rbd->image);
|
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
SPDK_ERRLOG("Failed to stat specified rbd device\n");
|
SPDK_ERRLOG("Failed to stat specified rbd device\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -336,14 +337,9 @@ bdev_rbd_init(struct bdev_rbd *rbd)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
rbd->main_td = spdk_get_thread();
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
return ret;
|
||||||
bdev_rbd_exit(rbd_image_t image)
|
|
||||||
{
|
|
||||||
rbd_flush(image);
|
|
||||||
rbd_close(image);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -534,7 +530,6 @@ _bdev_rbd_destruct_done(void *io_device)
|
|||||||
struct bdev_rbd *rbd = io_device;
|
struct bdev_rbd *rbd = io_device;
|
||||||
|
|
||||||
assert(rbd != NULL);
|
assert(rbd != NULL);
|
||||||
assert(rbd->ch_count == 0);
|
|
||||||
|
|
||||||
spdk_bdev_destruct_done(&rbd->disk, 0);
|
spdk_bdev_destruct_done(&rbd->disk, 0);
|
||||||
bdev_rbd_free(rbd);
|
bdev_rbd_free(rbd);
|
||||||
@ -593,14 +588,12 @@ static void
|
|||||||
bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
|
bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
|
||||||
bool success)
|
bool success)
|
||||||
{
|
{
|
||||||
struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
|
|
||||||
|
|
||||||
if (!success) {
|
if (!success) {
|
||||||
bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
spdk_thread_exec_msg(disk->main_td, bdev_rbd_start_aio, bdev_io);
|
bdev_rbd_start_aio(bdev_io);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -621,7 +614,7 @@ bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io
|
|||||||
case SPDK_BDEV_IO_TYPE_UNMAP:
|
case SPDK_BDEV_IO_TYPE_UNMAP:
|
||||||
case SPDK_BDEV_IO_TYPE_FLUSH:
|
case SPDK_BDEV_IO_TYPE_FLUSH:
|
||||||
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
|
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
|
||||||
spdk_thread_exec_msg(disk->main_td, bdev_rbd_start_aio, bdev_io);
|
bdev_rbd_start_aio(bdev_io);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SPDK_BDEV_IO_TYPE_RESET:
|
case SPDK_BDEV_IO_TYPE_RESET:
|
||||||
@ -652,121 +645,25 @@ bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
bdev_rbd_free_channel_resources(struct bdev_rbd *disk)
|
|
||||||
{
|
|
||||||
assert(disk != NULL);
|
|
||||||
assert(disk->main_td == spdk_get_thread());
|
|
||||||
assert(disk->ch_count == 0);
|
|
||||||
|
|
||||||
spdk_put_io_channel(disk->group_ch);
|
|
||||||
if (disk->image) {
|
|
||||||
bdev_rbd_exit(disk->image);
|
|
||||||
}
|
|
||||||
|
|
||||||
disk->main_td = NULL;
|
|
||||||
disk->group_ch = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void *
|
|
||||||
bdev_rbd_handle(void *arg)
|
|
||||||
{
|
|
||||||
struct bdev_rbd *disk = arg;
|
|
||||||
void *ret = arg;
|
|
||||||
|
|
||||||
if (rbd_open(disk->io_ctx, disk->rbd_name, &disk->image, NULL) < 0) {
|
|
||||||
SPDK_ERRLOG("Failed to open specified rbd device\n");
|
|
||||||
ret = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
_bdev_rbd_create_cb(struct bdev_rbd *disk)
|
|
||||||
{
|
|
||||||
disk->group_ch = spdk_get_io_channel(&rbd_if);
|
|
||||||
assert(disk->group_ch != NULL);
|
|
||||||
|
|
||||||
if (spdk_call_unaffinitized(bdev_rbd_handle, disk) == NULL) {
|
|
||||||
bdev_rbd_free_channel_resources(disk);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
bdev_rbd_create_cb(void *io_device, void *ctx_buf)
|
bdev_rbd_create_cb(void *io_device, void *ctx_buf)
|
||||||
{
|
{
|
||||||
struct bdev_rbd_io_channel *ch = ctx_buf;
|
struct bdev_rbd_io_channel *ch = ctx_buf;
|
||||||
struct bdev_rbd *disk = io_device;
|
struct bdev_rbd *disk = io_device;
|
||||||
int rc;
|
|
||||||
|
|
||||||
ch->disk = disk;
|
ch->disk = disk;
|
||||||
pthread_mutex_lock(&disk->mutex);
|
ch->group_ch = spdk_get_io_channel(&rbd_if);
|
||||||
if (disk->ch_count == 0) {
|
assert(ch->group_ch != NULL);
|
||||||
assert(disk->main_td == NULL);
|
|
||||||
rc = _bdev_rbd_create_cb(disk);
|
|
||||||
if (rc) {
|
|
||||||
SPDK_ERRLOG("Cannot create channel for disk=%p\n", disk);
|
|
||||||
pthread_mutex_unlock(&disk->mutex);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
disk->main_td = spdk_get_thread();
|
|
||||||
}
|
|
||||||
|
|
||||||
disk->ch_count++;
|
|
||||||
pthread_mutex_unlock(&disk->mutex);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
_bdev_rbd_destroy_cb(void *ctx)
|
|
||||||
{
|
|
||||||
struct bdev_rbd *disk = ctx;
|
|
||||||
|
|
||||||
pthread_mutex_lock(&disk->mutex);
|
|
||||||
assert(disk->ch_count > 0);
|
|
||||||
disk->ch_count--;
|
|
||||||
|
|
||||||
if (disk->ch_count > 0) {
|
|
||||||
/* A new channel was created between when message was sent and this function executed */
|
|
||||||
pthread_mutex_unlock(&disk->mutex);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
bdev_rbd_free_channel_resources(disk);
|
|
||||||
pthread_mutex_unlock(&disk->mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
|
bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
|
||||||
{
|
{
|
||||||
struct bdev_rbd *disk = io_device;
|
struct bdev_rbd_io_channel *ch = ctx_buf;
|
||||||
struct spdk_thread *thread;
|
|
||||||
|
|
||||||
pthread_mutex_lock(&disk->mutex);
|
spdk_put_io_channel(ch->group_ch);
|
||||||
assert(disk->ch_count > 0);
|
|
||||||
disk->ch_count--;
|
|
||||||
if (disk->ch_count == 0) {
|
|
||||||
assert(disk->main_td != NULL);
|
|
||||||
if (disk->main_td != spdk_get_thread()) {
|
|
||||||
/* The final channel was destroyed on a different thread
|
|
||||||
* than where the first channel was created. Pass a message
|
|
||||||
* to the main thread to unregister the poller. */
|
|
||||||
disk->ch_count++;
|
|
||||||
thread = disk->main_td;
|
|
||||||
pthread_mutex_unlock(&disk->mutex);
|
|
||||||
spdk_thread_send_msg(thread, _bdev_rbd_destroy_cb, disk);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
bdev_rbd_free_channel_resources(disk);
|
|
||||||
}
|
|
||||||
pthread_mutex_unlock(&disk->mutex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct spdk_io_channel *
|
static struct spdk_io_channel *
|
||||||
@ -1177,13 +1074,6 @@ bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = pthread_mutex_init(&rbd->mutex, NULL);
|
|
||||||
if (ret) {
|
|
||||||
SPDK_ERRLOG("Cannot init mutex on rbd=%p\n", rbd->disk.name);
|
|
||||||
free(rbd);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
rbd->rbd_name = strdup(rbd_name);
|
rbd->rbd_name = strdup(rbd_name);
|
||||||
if (!rbd->rbd_name) {
|
if (!rbd->rbd_name) {
|
||||||
bdev_rbd_free(rbd);
|
bdev_rbd_free(rbd);
|
||||||
|
Loading…
Reference in New Issue
Block a user