diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index ec453147b..68010db84 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -44,6 +44,7 @@ #include "spdk/nvme_spec.h" #include "spdk/json.h" #include "spdk/queue.h" +#include "spdk/histogram_data.h" #ifdef __cplusplus extern "C" { @@ -1078,6 +1079,34 @@ void spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, */ void spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp); +typedef void (*spdk_bdev_histogram_status_cb)(void *cb_arg, int status); +typedef void (*spdk_bdev_histogram_data_cb)(void *cb_arg, int status, + struct spdk_histogram_data *histogram); + +/** + * Enable or disable collecting histogram data on a bdev. + * + * \param bdev Block device. + * \param cb_fn Callback function to be called when histograms are enabled. + * \param cb_arg Argument to pass to cb_fn. + * \param enable Enable/disable flag + */ +void spdk_bdev_histogram_enable(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn, + void *cb_arg, bool enable); + +/** + * Get aggregated histogram data from a bdev. Callback provides merged histogram + * for specified bdev. + * + * \param bdev Block device. + * \param histogram Histogram for aggregated data + * \param cb_fn Callback function to be called with data collected on bdev. + * \param cb_arg Argument to pass to cb_fn. + */ +void spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data *histogram, + spdk_bdev_histogram_data_cb cb_fn, + void *cb_arg); + #ifdef __cplusplus } #endif diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index afde316ad..296c6059f 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -356,6 +356,10 @@ struct spdk_bdev { /** accumulated I/O statistics for previously deleted channels of this bdev */ struct spdk_bdev_io_stat stat; + + /** histogram enabled on this bdev */ + bool histogram_enabled; + bool histogram_in_progress; } internal; }; diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 000fddfb1..76d501774 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -250,6 +250,8 @@ struct spdk_bdev_channel { uint32_t flags; + struct spdk_histogram_data *histogram; + #ifdef SPDK_CONFIG_VTUNE uint64_t start_tsc; uint64_t interval_tsc; @@ -1854,6 +1856,14 @@ spdk_bdev_channel_create(void *io_device, void *ctx_buf) return -1; } + assert(ch->histogram == NULL); + if (bdev->internal.histogram_enabled) { + ch->histogram = spdk_histogram_data_alloc(); + if (ch->histogram == NULL) { + SPDK_ERRLOG("Could not allocate histogram\n"); + } + } + mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr); if (!mgmt_io_ch) { spdk_put_io_channel(ch->channel); @@ -2077,6 +2087,10 @@ spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) _spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_small, ch); _spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_large, ch); + if (ch->histogram) { + spdk_histogram_data_free(ch->histogram); + } + _spdk_bdev_channel_destroy_resource(ch); } @@ -3096,6 +3110,10 @@ _spdk_bdev_io_complete(void *ctx) tsc_diff = tsc - bdev_io->internal.submit_tsc; spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_DONE, 0, 0, (uintptr_t)bdev_io, 0); + if (bdev_io->internal.ch->histogram) { + spdk_histogram_data_tally(bdev_io->internal.ch->histogram, tsc_diff); + } + if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) { switch (bdev_io->type) { case SPDK_BDEV_IO_TYPE_READ: @@ -4148,6 +4166,171 @@ spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits, pthread_mutex_unlock(&bdev->internal.mutex); } +struct spdk_bdev_histogram_ctx { + spdk_bdev_histogram_status_cb cb_fn; + void *cb_arg; + struct spdk_bdev *bdev; + int status; +}; + +static void +_spdk_bdev_histogram_disable_channel_cb(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_bdev_histogram_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + + pthread_mutex_lock(&ctx->bdev->internal.mutex); + ctx->bdev->internal.histogram_in_progress = false; + pthread_mutex_unlock(&ctx->bdev->internal.mutex); + ctx->cb_fn(ctx->cb_arg, ctx->status); + free(ctx); +} + +static void +_spdk_bdev_histogram_disable_channel(struct spdk_io_channel_iter *i) +{ + struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); + struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch); + + if (ch->histogram != NULL) { + spdk_histogram_data_free(ch->histogram); + ch->histogram = NULL; + } + spdk_for_each_channel_continue(i, 0); +} + +static void +_spdk_bdev_histogram_enable_channel_cb(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_bdev_histogram_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + + if (status != 0) { + ctx->status = status; + ctx->bdev->internal.histogram_enabled = false; + spdk_for_each_channel(__bdev_to_io_dev(ctx->bdev), _spdk_bdev_histogram_disable_channel, ctx, + _spdk_bdev_histogram_disable_channel_cb); + } else { + pthread_mutex_lock(&ctx->bdev->internal.mutex); + ctx->bdev->internal.histogram_in_progress = false; + pthread_mutex_unlock(&ctx->bdev->internal.mutex); + ctx->cb_fn(ctx->cb_arg, ctx->status); + free(ctx); + } +} + +static void +_spdk_bdev_histogram_enable_channel(struct spdk_io_channel_iter *i) +{ + struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); + struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch); + int status = 0; + + if (ch->histogram == NULL) { + ch->histogram = spdk_histogram_data_alloc(); + if (ch->histogram == NULL) { + status = -ENOMEM; + } + } + + spdk_for_each_channel_continue(i, status); +} + +void +spdk_bdev_histogram_enable(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn, + void *cb_arg, bool enable) +{ + struct spdk_bdev_histogram_ctx *ctx; + + ctx = calloc(1, sizeof(struct spdk_bdev_histogram_ctx)); + if (ctx == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + ctx->bdev = bdev; + ctx->status = 0; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + pthread_mutex_lock(&bdev->internal.mutex); + if (bdev->internal.histogram_in_progress) { + pthread_mutex_unlock(&bdev->internal.mutex); + free(ctx); + cb_fn(cb_arg, -EAGAIN); + return; + } + + bdev->internal.histogram_in_progress = true; + pthread_mutex_unlock(&bdev->internal.mutex); + + bdev->internal.histogram_enabled = enable; + + if (enable) { + /* Allocate histogram for each channel */ + spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_histogram_enable_channel, ctx, + _spdk_bdev_histogram_enable_channel_cb); + } else { + spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_histogram_disable_channel, ctx, + _spdk_bdev_histogram_disable_channel_cb); + } +} + +struct spdk_bdev_histogram_data_ctx { + spdk_bdev_histogram_data_cb cb_fn; + void *cb_arg; + struct spdk_bdev *bdev; + /** merged histogram data from all channels */ + struct spdk_histogram_data *histogram; +}; + +static void +_spdk_bdev_histogram_get_channel_cb(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_bdev_histogram_data_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + + ctx->cb_fn(ctx->cb_arg, status, ctx->histogram); + free(ctx); +} + +static void +_spdk_bdev_histogram_get_channel(struct spdk_io_channel_iter *i) +{ + struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); + struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch); + struct spdk_bdev_histogram_data_ctx *ctx = spdk_io_channel_iter_get_ctx(i); + int status = 0; + + if (ch->histogram == NULL) { + status = -EFAULT; + } else { + spdk_histogram_data_merge(ctx->histogram, ch->histogram); + } + + spdk_for_each_channel_continue(i, status); +} + +void +spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data *histogram, + spdk_bdev_histogram_data_cb cb_fn, + void *cb_arg) +{ + struct spdk_bdev_histogram_data_ctx *ctx; + + ctx = calloc(1, sizeof(struct spdk_bdev_histogram_data_ctx)); + if (ctx == NULL) { + cb_fn(cb_arg, -ENOMEM, NULL); + return; + } + + ctx->bdev = bdev; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + ctx->histogram = histogram; + + spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_histogram_get_channel, ctx, + _spdk_bdev_histogram_get_channel_cb); +} + SPDK_LOG_REGISTER_COMPONENT("bdev", SPDK_LOG_BDEV) SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV)