From 7d030ef7fc9ec09d7e66a966eac0fcec5c1bd8bb Mon Sep 17 00:00:00 2001 From: GangCao Date: Tue, 4 Sep 2018 11:01:51 -0400 Subject: [PATCH] QoS/Bdev: add the QoS related structure and enumeration This patch is to introduce the specific QoS related structure and the enumeration for types of QoS rate limits. Later new types of QoS rate limits can be supported easily. Change-Id: Idb8d2e7627fd145bf2b0ddb296c968b6b068f48c Signed-off-by: GangCao Reviewed-on: https://review.gerrithub.io/424459 Chandler-Test-Pool: SPDK Automated Test System Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Ben Walker --- include/spdk/bdev.h | 40 ++- lib/bdev/bdev.c | 413 +++++++++++++++++-------- lib/bdev/rpc/bdev_rpc.c | 26 +- test/bdev/bdev.conf.in | 8 +- test/unit/lib/bdev/mt/bdev.c/bdev_ut.c | 61 +++- 5 files changed, 391 insertions(+), 157 deletions(-) diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index f75e80394..097cca18c 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -105,6 +105,16 @@ enum spdk_bdev_io_type { SPDK_BDEV_NUM_IO_TYPES /* Keep last */ }; +/** bdev QoS rate limit type */ +enum spdk_bdev_qos_rate_limit_type { + /** IOPS rate limit for both read and write */ + SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT = 0, + /** Byte per second rate limit for both read and write */ + SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT, + /** Keep last */ + SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES +}; + /** * Block device completion callback. * @@ -318,25 +328,35 @@ uint32_t spdk_bdev_get_block_size(const struct spdk_bdev *bdev); uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev); /** - * Get IOs per second of block device for the QoS rate limiting. + * Get the string of quality of service rate limit. * - * \param bdev Block device to query. - * \return IOs per second. - * - * Return 0 for no QoS enforced on the queried block device. + * \param type Type of rate limit to query. + * \return String of QoS type. */ -uint64_t spdk_bdev_get_qos_ios_per_sec(struct spdk_bdev *bdev); +const char *spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type); /** - * Set an IOPS-based quality of service rate limit on a bdev. + * Get the quality of service rate limits on a bdev. + * + * \param bdev Block device to query. + * \param limits Pointer to the QoS rate limits array which holding the limits. + * + * The limits are ordered based on the @ref spdk_bdev_qos_rate_limit_type enum. + */ +void spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits); + +/** + * Set the quality of service rate limits on a bdev. * * \param bdev Block device. - * \param ios_per_sec I/O per second limit. + * \param limits Pointer to the QoS rate limits array which holding the limits. * \param cb_fn Callback function to be called when the QoS limit has been updated. * \param cb_arg Argument to pass to cb_fn. + * + * The limits are ordered based on the @ref spdk_bdev_qos_rate_limit_type enum. */ -void spdk_bdev_set_qos_limit_iops(struct spdk_bdev *bdev, uint64_t ios_per_sec, - void (*cb_fn)(void *cb_arg, int status), void *cb_arg); +void spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits, + void (*cb_fn)(void *cb_arg, int status), void *cb_arg); /** * Get minimum I/O buffer address alignment for a bdev. diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 37e8f4e2f..3435df32c 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -75,15 +75,11 @@ int __itt_init_ittlib(const char *, __itt_group_id); #define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE 1 #define SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE 512 #define SPDK_BDEV_QOS_MIN_IOS_PER_SEC 10000 -#define SPDK_BDEV_QOS_MIN_BW_IN_MB_PER_SEC 10 +#define SPDK_BDEV_QOS_MIN_BYTES_PER_SEC (10 * 1024 * 1024) +#define SPDK_BDEV_QOS_LIMIT_NOT_DEFINED UINT64_MAX -enum spdk_bdev_qos_type { - SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT = 0, - SPDK_BDEV_QOS_RW_BYTEPS_RATE_LIMIT, - SPDK_BDEV_QOS_NUM_TYPES /* Keep last */ -}; - -static const char *qos_type_str[SPDK_BDEV_QOS_NUM_TYPES] = {"Limit_IOPS", "Limit_BWPS"}; +static const char *qos_conf_type[] = {"Limit_IOPS", "Limit_BPS"}; +static const char *qos_rpc_type[] = {"qos_ios_per_sec"}; TAILQ_HEAD(spdk_bdev_list, spdk_bdev); @@ -126,14 +122,29 @@ static spdk_bdev_fini_cb g_fini_cb_fn = NULL; static void *g_fini_cb_arg = NULL; static struct spdk_thread *g_fini_thread = NULL; +struct spdk_bdev_qos_limit { + /** IOs or bytes allowed per second (i.e., 1s). */ + uint64_t limit; + + /** Remaining IOs or bytes allowed in current timeslice (e.g., 1ms). + * For remaining bytes, allowed to run negative if an I/O is submitted when + * some bytes are remaining, but the I/O is bigger than that amount. The + * excess will be deducted from the next timeslice. + */ + int64_t remaining_this_timeslice; + + /** Minimum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */ + uint32_t min_per_timeslice; + + /** Maximum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */ + uint32_t max_per_timeslice; +}; + struct spdk_bdev_qos { - /** Rate limit, in I/O per second */ - uint64_t iops_rate_limit; + /** Types of structure of rate limits. */ + struct spdk_bdev_qos_limit rate_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES]; - /** Rate limit, in byte per second */ - uint64_t byte_rate_limit; - - /** The channel that all I/O are funneled through */ + /** The channel that all I/O are funneled through. */ struct spdk_bdev_channel *ch; /** The thread on which the poller is running. */ @@ -148,24 +159,6 @@ struct spdk_bdev_qos { /** Timestamp of start of last timeslice. */ uint64_t last_timeslice; - /** Maximum allowed IOs to be issued in one timeslice (e.g., 1ms) and - * only valid for the master channel which manages the outstanding IOs. */ - uint64_t max_ios_per_timeslice; - - /** Maximum allowed bytes to be issued in one timeslice (e.g., 1ms) and - * only valid for the master channel which manages the outstanding IOs. */ - uint64_t max_byte_per_timeslice; - - /** Remaining IO allowed in current timeslice (e.g., 1ms) */ - uint64_t io_remaining_this_timeslice; - - /** Remaining bytes allowed in current timeslice (e.g., 1ms). - * Allowed to run negative if an I/O is submitted when some bytes are remaining, - * but the I/O is bigger than that amount. The excess will be deducted from the - * next timeslice. - */ - int64_t byte_remaining_this_timeslice; - /** Poller that processes queued I/O commands each time slice. */ struct spdk_poller *poller; }; @@ -1029,13 +1022,44 @@ spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) } } +static bool +_spdk_bdev_qos_is_iops_rate_limit(enum spdk_bdev_qos_rate_limit_type limit) +{ + assert(limit != SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES); + + switch (limit) { + case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT: + return true; + case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT: + return false; + case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES: + default: + return false; + } +} + +static bool +_spdk_bdev_qos_io_to_limit(struct spdk_bdev_io *bdev_io) +{ + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_NVME_IO: + case SPDK_BDEV_IO_TYPE_NVME_IO_MD: + case SPDK_BDEV_IO_TYPE_READ: + case SPDK_BDEV_IO_TYPE_WRITE: + case SPDK_BDEV_IO_TYPE_UNMAP: + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + return true; + default: + return false; + } +} + static uint64_t _spdk_bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io) { struct spdk_bdev *bdev = bdev_io->bdev; switch (bdev_io->type) { - case SPDK_BDEV_IO_TYPE_NVME_ADMIN: case SPDK_BDEV_IO_TYPE_NVME_IO: case SPDK_BDEV_IO_TYPE_NVME_IO_MD: return bdev_io->u.nvme_passthru.nbytes; @@ -1049,6 +1073,30 @@ _spdk_bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io) } } +static void +_spdk_bdev_qos_update_per_io(struct spdk_bdev_qos *qos, uint64_t io_size_in_byte) +{ + int i; + + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) { + continue; + } + + switch (i) { + case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT: + qos->rate_limits[i].remaining_this_timeslice--; + break; + case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT: + qos->rate_limits[i].remaining_this_timeslice -= io_size_in_byte; + break; + case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES: + default: + break; + } + } +} + static void _spdk_bdev_qos_io_submit(struct spdk_bdev_channel *ch) { @@ -1056,22 +1104,27 @@ _spdk_bdev_qos_io_submit(struct spdk_bdev_channel *ch) struct spdk_bdev *bdev = ch->bdev; struct spdk_bdev_qos *qos = bdev->internal.qos; struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource; + int i; + bool to_limit_io; + uint64_t io_size_in_byte; while (!TAILQ_EMPTY(&qos->queued)) { - if (qos->max_ios_per_timeslice > 0 && qos->io_remaining_this_timeslice == 0) { - break; - } - - if (qos->max_byte_per_timeslice > 0 && qos->byte_remaining_this_timeslice <= 0) { - break; + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (qos->rate_limits[i].max_per_timeslice > 0 && + (qos->rate_limits[i].remaining_this_timeslice <= 0)) { + return; + } } bdev_io = TAILQ_FIRST(&qos->queued); TAILQ_REMOVE(&qos->queued, bdev_io, internal.link); - qos->io_remaining_this_timeslice--; - qos->byte_remaining_this_timeslice -= _spdk_bdev_get_io_size_in_byte(bdev_io); ch->io_outstanding++; shared_resource->io_outstanding++; + to_limit_io = _spdk_bdev_qos_io_to_limit(bdev_io); + if (to_limit_io == true) { + io_size_in_byte = _spdk_bdev_get_io_size_in_byte(bdev_io); + _spdk_bdev_qos_update_per_io(qos, io_size_in_byte); + } bdev->fn_table->submit_request(ch->channel, bdev_io); } } @@ -1405,20 +1458,22 @@ spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) static void spdk_bdev_qos_update_max_quota_per_timeslice(struct spdk_bdev_qos *qos) { - uint64_t max_ios_per_timeslice = 0, max_byte_per_timeslice = 0; + uint32_t max_per_timeslice = 0; + int i; - if (qos->iops_rate_limit > 0) { - max_ios_per_timeslice = qos->iops_rate_limit * SPDK_BDEV_QOS_TIMESLICE_IN_USEC / - SPDK_SEC_TO_USEC; - qos->max_ios_per_timeslice = spdk_max(max_ios_per_timeslice, - SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE); - } + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) { + qos->rate_limits[i].max_per_timeslice = 0; + continue; + } - if (qos->byte_rate_limit > 0) { - max_byte_per_timeslice = qos->byte_rate_limit * SPDK_BDEV_QOS_TIMESLICE_IN_USEC / - SPDK_SEC_TO_USEC; - qos->max_byte_per_timeslice = spdk_max(max_byte_per_timeslice, - SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE); + max_per_timeslice = qos->rate_limits[i].limit * + SPDK_BDEV_QOS_TIMESLICE_IN_USEC / SPDK_SEC_TO_USEC; + + qos->rate_limits[i].max_per_timeslice = spdk_max(max_per_timeslice, + qos->rate_limits[i].min_per_timeslice); + + qos->rate_limits[i].remaining_this_timeslice = qos->rate_limits[i].max_per_timeslice; } } @@ -1427,6 +1482,7 @@ spdk_bdev_channel_poll_qos(void *arg) { struct spdk_bdev_qos *qos = arg; uint64_t now = spdk_get_ticks(); + int i; if (now < (qos->last_timeslice + qos->timeslice_size)) { /* We received our callback earlier than expected - return @@ -1438,20 +1494,23 @@ spdk_bdev_channel_poll_qos(void *arg) } /* Reset for next round of rate limiting */ - qos->io_remaining_this_timeslice = 0; - /* We may have allowed the bytes to slightly overrun in the last timeslice. - * byte_remaining_this_timeslice is signed, so if it's negative here, we'll - * account for the overrun so that the next timeslice will be appropriately - * reduced. - */ - if (qos->byte_remaining_this_timeslice > 0) { - qos->byte_remaining_this_timeslice = 0; + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + /* We may have allowed the IOs or bytes to slightly overrun in the last + * timeslice. remaining_this_timeslice is signed, so if it's negative + * here, we'll account for the overrun so that the next timeslice will + * be appropriately reduced. + */ + if (qos->rate_limits[i].remaining_this_timeslice > 0) { + qos->rate_limits[i].remaining_this_timeslice = 0; + } } while (now >= (qos->last_timeslice + qos->timeslice_size)) { qos->last_timeslice += qos->timeslice_size; - qos->io_remaining_this_timeslice += qos->max_ios_per_timeslice; - qos->byte_remaining_this_timeslice += qos->max_byte_per_timeslice; + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + qos->rate_limits[i].remaining_this_timeslice += + qos->rate_limits[i].max_per_timeslice; + } } _spdk_bdev_qos_io_submit(qos->ch); @@ -1492,7 +1551,8 @@ _spdk_bdev_channel_destroy_resource(struct spdk_bdev_channel *ch) static void _spdk_bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch) { - struct spdk_bdev_qos *qos = bdev->internal.qos; + struct spdk_bdev_qos *qos = bdev->internal.qos; + int i; /* Rate limiting on this bdev enabled */ if (qos) { @@ -1511,9 +1571,21 @@ _spdk_bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch) qos->thread = spdk_io_channel_get_thread(io_ch); TAILQ_INIT(&qos->queued); + + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) { + qos->rate_limits[i].min_per_timeslice = + SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE; + } else { + qos->rate_limits[i].min_per_timeslice = + SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE; + } + + if (qos->rate_limits[i].limit == 0) { + qos->rate_limits[i].limit = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED; + } + } spdk_bdev_qos_update_max_quota_per_timeslice(qos); - qos->io_remaining_this_timeslice = qos->max_ios_per_timeslice; - qos->byte_remaining_this_timeslice = qos->max_byte_per_timeslice; qos->timeslice_size = SPDK_BDEV_QOS_TIMESLICE_IN_USEC * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; qos->last_timeslice = spdk_get_ticks(); @@ -1670,6 +1742,8 @@ spdk_bdev_qos_channel_destroy(void *cb_arg) static int spdk_bdev_qos_destroy(struct spdk_bdev *bdev) { + int i; + /* * Cleanly shutting down the QoS poller is tricky, because * during the asynchronous operation the user could open @@ -1696,12 +1770,17 @@ spdk_bdev_qos_destroy(struct spdk_bdev *bdev) /* Zero out the key parts of the QoS structure */ new_qos->ch = NULL; new_qos->thread = NULL; - new_qos->max_ios_per_timeslice = 0; - new_qos->max_byte_per_timeslice = 0; - new_qos->io_remaining_this_timeslice = 0; - new_qos->byte_remaining_this_timeslice = 0; new_qos->poller = NULL; TAILQ_INIT(&new_qos->queued); + /* + * The limit member of spdk_bdev_qos_limit structure is not zeroed. + * It will be used later for the new QoS structure. + */ + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + new_qos->rate_limits[i].remaining_this_timeslice = 0; + new_qos->rate_limits[i].min_per_timeslice = 0; + new_qos->rate_limits[i].max_per_timeslice = 0; + } bdev->internal.qos = new_qos; @@ -1855,18 +1934,29 @@ spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) return bdev->blockcnt; } -uint64_t -spdk_bdev_get_qos_ios_per_sec(struct spdk_bdev *bdev) +const char * +spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type) { - uint64_t iops_rate_limit = 0; + return qos_rpc_type[type]; +} + +void +spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits) +{ + int i; + + memset(limits, 0, sizeof(*limits) * SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES); pthread_mutex_lock(&bdev->internal.mutex); if (bdev->internal.qos) { - iops_rate_limit = bdev->internal.qos->iops_rate_limit; + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (bdev->internal.qos->rate_limits[i].limit != + SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) { + limits[i] = bdev->internal.qos->rate_limits[i].limit; + } + } } pthread_mutex_unlock(&bdev->internal.mutex); - - return iops_rate_limit; } size_t @@ -2966,28 +3056,39 @@ spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io) } static void -_spdk_bdev_qos_config_type(struct spdk_bdev *bdev, uint64_t qos_set, - enum spdk_bdev_qos_type qos_type) +_spdk_bdev_qos_config_limit(struct spdk_bdev *bdev, uint64_t *limits) { - uint64_t min_qos_set = 0; + uint64_t min_qos_set; + int i; - switch (qos_type) { - case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT: - min_qos_set = SPDK_BDEV_QOS_MIN_IOS_PER_SEC; - break; - case SPDK_BDEV_QOS_RW_BYTEPS_RATE_LIMIT: - min_qos_set = SPDK_BDEV_QOS_MIN_BW_IN_MB_PER_SEC; - break; - default: - SPDK_ERRLOG("Unsupported QoS type.\n"); + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) { + break; + } + } + + if (i == SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES) { + SPDK_ERRLOG("Invalid rate limits set.\n"); return; } - if (qos_set % min_qos_set) { - SPDK_ERRLOG("Assigned QoS %" PRIu64 " on bdev %s is not multiple of %lu\n", - qos_set, bdev->name, min_qos_set); - SPDK_ERRLOG("Failed to enable QoS on this bdev %s\n", bdev->name); - return; + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) { + continue; + } + + if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) { + min_qos_set = SPDK_BDEV_QOS_MIN_IOS_PER_SEC; + } else { + min_qos_set = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC; + } + + if (limits[i] == 0 || limits[i] % min_qos_set) { + SPDK_ERRLOG("Assigned limit %" PRIu64 " on bdev %s is not multiple of %" PRIu64 "\n", + limits[i], bdev->name, min_qos_set); + SPDK_ERRLOG("Failed to enable QoS on this bdev %s\n", bdev->name); + return; + } } if (!bdev->internal.qos) { @@ -2998,20 +3099,12 @@ _spdk_bdev_qos_config_type(struct spdk_bdev *bdev, uint64_t qos_set, } } - switch (qos_type) { - case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT: - bdev->internal.qos->iops_rate_limit = qos_set; - break; - case SPDK_BDEV_QOS_RW_BYTEPS_RATE_LIMIT: - bdev->internal.qos->byte_rate_limit = qos_set * 1024 * 1024; - break; - default: - break; + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + bdev->internal.qos->rate_limits[i].limit = limits[i]; + SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Bdev:%s QoS type:%d set:%lu\n", + bdev->name, i, limits[i]); } - SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Bdev:%s QoS type:%d set:%lu\n", - bdev->name, qos_type, qos_set); - return; } @@ -3020,18 +3113,21 @@ _spdk_bdev_qos_config(struct spdk_bdev *bdev) { struct spdk_conf_section *sp = NULL; const char *val = NULL; - uint64_t qos_set = 0; int i = 0, j = 0; + uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES] = {}; + bool config_qos = false; sp = spdk_conf_find_section(NULL, "QoS"); if (!sp) { return; } - while (j < SPDK_BDEV_QOS_NUM_TYPES) { + while (j < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES) { + limits[j] = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED; + i = 0; while (true) { - val = spdk_conf_section_get_nmval(sp, qos_type_str[j], i, 0); + val = spdk_conf_section_get_nmval(sp, qos_conf_type[j], i, 0); if (!val) { break; } @@ -3041,10 +3137,14 @@ _spdk_bdev_qos_config(struct spdk_bdev *bdev) continue; } - val = spdk_conf_section_get_nmval(sp, qos_type_str[j], i, 1); + val = spdk_conf_section_get_nmval(sp, qos_conf_type[j], i, 1); if (val) { - qos_set = strtoull(val, NULL, 10); - _spdk_bdev_qos_config_type(bdev, qos_set, j); + if (_spdk_bdev_qos_is_iops_rate_limit(j) == true) { + limits[j] = strtoull(val, NULL, 10); + } else { + limits[j] = strtoull(val, NULL, 10) * 1024 * 1024; + } + config_qos = true; } break; @@ -3053,6 +3153,10 @@ _spdk_bdev_qos_config(struct spdk_bdev *bdev) j++; } + if (config_qos == true) { + _spdk_bdev_qos_config_limit(bdev, limits); + } + return; } @@ -3597,7 +3701,7 @@ _spdk_bdev_disable_qos_msg(struct spdk_io_channel_iter *i) } static void -_spdk_bdev_update_qos_limit_iops_msg(void *cb_arg) +_spdk_bdev_update_qos_rate_limit_msg(void *cb_arg) { struct set_qos_limit_ctx *ctx = cb_arg; struct spdk_bdev *bdev = ctx->bdev; @@ -3631,17 +3735,59 @@ _spdk_bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status) _spdk_bdev_set_qos_limit_done(ctx, status); } -void -spdk_bdev_set_qos_limit_iops(struct spdk_bdev *bdev, uint64_t ios_per_sec, - void (*cb_fn)(void *cb_arg, int status), void *cb_arg) +static void +_spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits) { - struct set_qos_limit_ctx *ctx; + int i; - if (ios_per_sec > 0 && ios_per_sec % SPDK_BDEV_QOS_MIN_IOS_PER_SEC) { - SPDK_ERRLOG("Requested ios_per_sec limit %" PRIu64 " is not a multiple of %u\n", - ios_per_sec, SPDK_BDEV_QOS_MIN_IOS_PER_SEC); - cb_fn(cb_arg, -EINVAL); - return; + assert(bdev->internal.qos != NULL); + + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) { + bdev->internal.qos->rate_limits[i].limit = limits[i]; + + if (limits[i] == 0) { + bdev->internal.qos->rate_limits[i].limit = + SPDK_BDEV_QOS_LIMIT_NOT_DEFINED; + } + } + } +} + +void +spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits, + void (*cb_fn)(void *cb_arg, int status), void *cb_arg) +{ + struct set_qos_limit_ctx *ctx; + uint32_t limit_set_complement; + uint64_t min_limit_per_sec; + int i; + bool disable_rate_limit = true; + + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) { + continue; + } + + if (limits[i] > 0) { + disable_rate_limit = false; + } + + if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) { + min_limit_per_sec = SPDK_BDEV_QOS_MIN_IOS_PER_SEC; + } else { + /* Change from megabyte to byte rate limit */ + limits[i] = limits[i] * 1024 * 1024; + min_limit_per_sec = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC; + } + + limit_set_complement = limits[i] % min_limit_per_sec; + if (limit_set_complement) { + SPDK_ERRLOG("Requested rate limit %" PRIu64 " is not a multiple of %" PRIu64 "\n", + limits[i], min_limit_per_sec); + limits[i] += min_limit_per_sec - limit_set_complement; + SPDK_ERRLOG("Round up the rate limit to %" PRIu64 "\n", limits[i]); + } } ctx = calloc(1, sizeof(*ctx)); @@ -3663,7 +3809,19 @@ spdk_bdev_set_qos_limit_iops(struct spdk_bdev *bdev, uint64_t ios_per_sec, } bdev->internal.qos_mod_in_progress = true; - if (ios_per_sec > 0) { + if (disable_rate_limit == true && bdev->internal.qos) { + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED && + (bdev->internal.qos->rate_limits[i].limit > 0 && + bdev->internal.qos->rate_limits[i].limit != + SPDK_BDEV_QOS_LIMIT_NOT_DEFINED)) { + disable_rate_limit = false; + break; + } + } + } + + if (disable_rate_limit == false) { if (bdev->internal.qos == NULL) { /* Enabling */ bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos)); @@ -3675,17 +3833,22 @@ spdk_bdev_set_qos_limit_iops(struct spdk_bdev *bdev, uint64_t ios_per_sec, return; } - bdev->internal.qos->iops_rate_limit = ios_per_sec; + _spdk_bdev_set_qos_rate_limits(bdev, limits); + spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_enable_qos_msg, ctx, _spdk_bdev_enable_qos_done); } else { /* Updating */ - bdev->internal.qos->iops_rate_limit = ios_per_sec; - spdk_thread_send_msg(bdev->internal.qos->thread, _spdk_bdev_update_qos_limit_iops_msg, ctx); + _spdk_bdev_set_qos_rate_limits(bdev, limits); + + spdk_thread_send_msg(bdev->internal.qos->thread, + _spdk_bdev_update_qos_rate_limit_msg, ctx); } } else { if (bdev->internal.qos != NULL) { + _spdk_bdev_set_qos_rate_limits(bdev, limits); + /* Disabling */ spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_disable_qos_msg, ctx, diff --git a/lib/bdev/rpc/bdev_rpc.c b/lib/bdev/rpc/bdev_rpc.c index 8d31348a3..0e2156c56 100644 --- a/lib/bdev/rpc/bdev_rpc.c +++ b/lib/bdev/rpc/bdev_rpc.c @@ -219,6 +219,7 @@ spdk_rpc_dump_bdev_info(struct spdk_json_write_ctx *w, struct spdk_bdev *bdev) { struct spdk_bdev_alias *tmp; + uint64_t qos_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES]; spdk_json_write_object_begin(w); @@ -250,8 +251,9 @@ spdk_rpc_dump_bdev_info(struct spdk_json_write_ctx *w, spdk_json_write_named_string(w, "uuid", uuid_str); } - spdk_json_write_name(w, "qos_ios_per_sec"); - spdk_json_write_uint64(w, spdk_bdev_get_qos_ios_per_sec(bdev)); + spdk_bdev_get_qos_rate_limits(bdev, qos_limits); + spdk_json_write_name(w, spdk_bdev_get_qos_rpc_type(SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT)); + spdk_json_write_uint64(w, qos_limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT]); spdk_json_write_name(w, "claimed"); spdk_json_write_bool(w, (bdev->internal.claim_module != NULL)); @@ -524,8 +526,9 @@ static void spdk_rpc_set_bdev_qos_limit_iops(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) { - struct rpc_set_bdev_qos_limit_iops req = {}; + struct rpc_set_bdev_qos_limit_iops req = {NULL, UINT64_MAX}; struct spdk_bdev *bdev; + uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES] = {UINT64_MAX, UINT64_MAX}; if (spdk_json_decode_object(params, rpc_set_bdev_qos_limit_iops_decoders, SPDK_COUNTOF(rpc_set_bdev_qos_limit_iops_decoders), @@ -537,16 +540,27 @@ spdk_rpc_set_bdev_qos_limit_iops(struct spdk_jsonrpc_request *request, bdev = spdk_bdev_get_by_name(req.name); if (bdev == NULL) { SPDK_ERRLOG("bdev '%s' does not exist\n", req.name); - goto invalid; + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Bdev does not exist"); + goto exit; + } + + if (req.ios_per_sec == UINT64_MAX) { + SPDK_ERRLOG("invalid rate limits set\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "Invalid rate limits"); + goto exit; } free_rpc_set_bdev_qos_limit_iops(&req); - spdk_bdev_set_qos_limit_iops(bdev, req.ios_per_sec, - spdk_rpc_set_bdev_qos_limit_iops_complete, request); + limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT] = req.ios_per_sec; + spdk_bdev_set_qos_rate_limits(bdev, limits, spdk_rpc_set_bdev_qos_limit_iops_complete, + request); return; invalid: spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); +exit: free_rpc_set_bdev_qos_limit_iops(&req); } diff --git a/test/bdev/bdev.conf.in b/test/bdev/bdev.conf.in index bb1d733f8..c45810788 100644 --- a/test/bdev/bdev.conf.in +++ b/test/bdev/bdev.conf.in @@ -27,7 +27,7 @@ [QoS] # QoS section defines limitation on performance - # metric like IOPS + # metric like IOPS and bandwidth # # Format: Limit_IOPS Bdev_Name IOPS_Limit_Value # @@ -36,6 +36,12 @@ # # Assign 20000 IOPS for the Malloc0 block device Limit_IOPS Malloc0 20000 + # + # Bandwidth limit must be 10 (MB) or greater and be + # multiple of 10 + # Assign 100 (MB) bandwidth for the Malloc3 block + # device + Limit_BPS Malloc3 100 [RAID0] Name raid0 diff --git a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c index 4d4b455e8..92c6cfa77 100644 --- a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c @@ -620,8 +620,10 @@ basic_qos(void) * Enable both IOPS and bandwidth rate limits. * In this case, both rate limits will take equal effect. */ - bdev->internal.qos->iops_rate_limit = 2000; /* 2 I/O per millisecond */ - bdev->internal.qos->byte_rate_limit = 8192000; /* 8K byte per millisecond with 4K block size */ + /* 2000 I/O per second, or 2 per millisecond */ + bdev->internal.qos->rate_limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT].limit = 2000; + /* 8K byte per millisecond with 4K block size */ + bdev->internal.qos->rate_limits[SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT].limit = 8192000; g_get_io_channel = true; @@ -728,8 +730,10 @@ io_during_qos_queue(void) * Enable both IOPS and bandwidth rate limits. * In this case, IOPS rate limit will take effect first. */ - bdev->internal.qos->iops_rate_limit = 1000; /* 1000 I/O per second, or 1 per millisecond */ - bdev->internal.qos->byte_rate_limit = 8192000; /* 8K byte per millisecond with 4K block size */ + /* 1000 I/O per second, or 1 per millisecond */ + bdev->internal.qos->rate_limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT].limit = 1000; + /* 8K byte per millisecond with 4K block size */ + bdev->internal.qos->rate_limits[SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT].limit = 8192000; g_get_io_channel = true; @@ -816,8 +820,10 @@ io_during_qos_reset(void) * Enable both IOPS and bandwidth rate limits. * In this case, bandwidth rate limit will take effect first. */ - bdev->internal.qos->iops_rate_limit = 2000; /* 2000 I/O per second, or 2 per millisecond */ - bdev->internal.qos->byte_rate_limit = 4096000; /* 4K byte per millisecond with 4K block size */ + /* 2000 I/O per second, or 2 per millisecond */ + bdev->internal.qos->rate_limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT].limit = 2000; + /* 4K byte per millisecond with 4K block size */ + bdev->internal.qos->rate_limits[SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT].limit = 4096000; g_get_io_channel = true; @@ -1147,11 +1153,16 @@ qos_dynamic_enable(void) struct spdk_bdev_channel *bdev_ch[2]; struct spdk_bdev *bdev; enum spdk_bdev_io_status bdev_io_status[2]; + uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES] = {}; int status, second_status, rc, i; setup_test(); reset_time(); + for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) { + limits[i] = UINT64_MAX; + } + bdev = &g_bdev.bdev; g_get_io_channel = true; @@ -1169,9 +1180,14 @@ qos_dynamic_enable(void) set_thread(0); - /* Enable QoS */ + /* + * Enable QoS: IOPS and byte per second rate limits. + * More than 10 I/Os allowed per timeslice. + */ status = -1; - spdk_bdev_set_qos_limit_iops(bdev, 10000, qos_dynamic_enable_done, &status); + limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT] = 10000; + limits[SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT] = 100; + spdk_bdev_set_qos_rate_limits(bdev, limits, qos_dynamic_enable_done, &status); poll_threads(); CU_ASSERT(status == 0); CU_ASSERT((bdev_ch[0]->flags & BDEV_CH_QOS_ENABLED) != 0); @@ -1209,9 +1225,19 @@ qos_dynamic_enable(void) CU_ASSERT(bdev_io_status[1] == SPDK_BDEV_IO_STATUS_PENDING); poll_threads(); - /* Disable QoS */ + /* Disable QoS: IOPS rate limit */ status = -1; - spdk_bdev_set_qos_limit_iops(bdev, 0, qos_dynamic_enable_done, &status); + limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT] = 0; + spdk_bdev_set_qos_rate_limits(bdev, limits, qos_dynamic_enable_done, &status); + poll_threads(); + CU_ASSERT(status == 0); + CU_ASSERT((bdev_ch[0]->flags & BDEV_CH_QOS_ENABLED) != 0); + CU_ASSERT((bdev_ch[1]->flags & BDEV_CH_QOS_ENABLED) != 0); + + /* Disable QoS: Byte per second rate limit */ + status = -1; + limits[SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT] = 0; + spdk_bdev_set_qos_rate_limits(bdev, limits, qos_dynamic_enable_done, &status); poll_threads(); CU_ASSERT(status == 0); CU_ASSERT((bdev_ch[0]->flags & BDEV_CH_QOS_ENABLED) == 0); @@ -1235,7 +1261,8 @@ qos_dynamic_enable(void) /* Disable QoS again */ status = -1; - spdk_bdev_set_qos_limit_iops(bdev, 0, qos_dynamic_enable_done, &status); + limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT] = 0; + spdk_bdev_set_qos_rate_limits(bdev, limits, qos_dynamic_enable_done, &status); poll_threads(); CU_ASSERT(status == 0); /* This should succeed */ CU_ASSERT((bdev_ch[0]->flags & BDEV_CH_QOS_ENABLED) == 0); @@ -1243,7 +1270,8 @@ qos_dynamic_enable(void) /* Enable QoS on thread 0 */ status = -1; - spdk_bdev_set_qos_limit_iops(bdev, 10000, qos_dynamic_enable_done, &status); + limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT] = 10000; + spdk_bdev_set_qos_rate_limits(bdev, limits, qos_dynamic_enable_done, &status); poll_threads(); CU_ASSERT(status == 0); CU_ASSERT((bdev_ch[0]->flags & BDEV_CH_QOS_ENABLED) != 0); @@ -1252,7 +1280,8 @@ qos_dynamic_enable(void) /* Disable QoS on thread 1 */ set_thread(1); status = -1; - spdk_bdev_set_qos_limit_iops(bdev, 0, qos_dynamic_enable_done, &status); + limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT] = 0; + spdk_bdev_set_qos_rate_limits(bdev, limits, qos_dynamic_enable_done, &status); /* Don't poll yet. This should leave the channels with QoS enabled */ CU_ASSERT(status == -1); CU_ASSERT((bdev_ch[0]->flags & BDEV_CH_QOS_ENABLED) != 0); @@ -1260,7 +1289,8 @@ qos_dynamic_enable(void) /* Enable QoS. This should immediately fail because the previous disable QoS hasn't completed. */ second_status = 0; - spdk_bdev_set_qos_limit_iops(bdev, 10000, qos_dynamic_enable_done, &second_status); + limits[SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT] = 10; + spdk_bdev_set_qos_rate_limits(bdev, limits, qos_dynamic_enable_done, &second_status); poll_threads(); CU_ASSERT(status == 0); /* The disable should succeed */ CU_ASSERT(second_status < 0); /* The enable should fail */ @@ -1269,7 +1299,8 @@ qos_dynamic_enable(void) /* Enable QoS on thread 1. This should succeed now that the disable has completed. */ status = -1; - spdk_bdev_set_qos_limit_iops(bdev, 10000, qos_dynamic_enable_done, &status); + limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT] = 10000; + spdk_bdev_set_qos_rate_limits(bdev, limits, qos_dynamic_enable_done, &status); poll_threads(); CU_ASSERT(status == 0); CU_ASSERT((bdev_ch[0]->flags & BDEV_CH_QOS_ENABLED) != 0);