diff --git a/CHANGELOG.md b/CHANGELOG.md index 153e94580..3aad89732 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -94,6 +94,10 @@ Changed `bdev_raid_get_bdevs` RPC output format to include raid_bdev details. Added `selector` parameter to bdev_nvme_set_multipath_policy RPC to set path selector for multipath. Option `round_robin` and `queue_depth` are available. +Added `rr_min_io` option to RPC bdev_nvme_set_multipath_policy. It switches I/O to +another path after rr_min_io I/Os are routed to current io path for the round-robin +path selector. + ### bdevperf Promoted the application to example to match similar programs: fio_plugin and perf. diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 4b16c88db..84cf13f98 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -4145,6 +4145,7 @@ Name | Optional | Type | Description name | Required | string | Name of the NVMe bdev policy | Required | string | Multipath policy: active_active or active_passive selector | Optional | string | Multipath selector: round_robin or queue_depth, used in active-active mode. Default is round_robin +rr_min_io | Optional | number | Number of I/Os routed to current io path before switching to another for round-robin selector. The min value is 1. #### Example diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index 16fc3fbcc..75bb534e9 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -557,6 +557,7 @@ static void bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch) { nbdev_ch->current_io_path = NULL; + nbdev_ch->rr_counter = 0; } static struct nvme_io_path * @@ -662,6 +663,7 @@ bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf) nbdev_ch->mp_policy = nbdev->mp_policy; nbdev_ch->mp_selector = nbdev->mp_selector; + nbdev_ch->rr_min_io = nbdev->rr_min_io; TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) { rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns); @@ -928,9 +930,15 @@ _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch) static inline struct nvme_io_path * bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch) { - if (spdk_likely(nbdev_ch->current_io_path != NULL && - nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE)) { - return nbdev_ch->current_io_path; + if (spdk_likely(nbdev_ch->current_io_path != NULL)) { + if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) { + return nbdev_ch->current_io_path; + } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) { + if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) { + return nbdev_ch->current_io_path; + } + nbdev_ch->rr_counter = 0; + } } if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE || @@ -3359,6 +3367,7 @@ nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) bdev->ref = 1; bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE; bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN; + bdev->rr_min_io = UINT32_MAX; TAILQ_INIT(&bdev->nvme_ns_list); TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq); bdev->opal = nvme_ctrlr->opal_dev != NULL; @@ -4169,6 +4178,7 @@ _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i) nbdev_ch->mp_policy = nbdev->mp_policy; nbdev_ch->mp_selector = nbdev->mp_selector; + nbdev_ch->rr_min_io = nbdev->rr_min_io; bdev_nvme_clear_current_io_path(nbdev_ch); spdk_for_each_channel_continue(i, 0); @@ -4176,7 +4186,8 @@ _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i) void bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy, - enum bdev_nvme_multipath_selector selector, bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg) + enum bdev_nvme_multipath_selector selector, uint32_t rr_min_io, + bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg) { struct bdev_nvme_set_multipath_policy_ctx *ctx; struct spdk_bdev *bdev; @@ -4185,11 +4196,23 @@ bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy assert(cb_fn != NULL); + if (policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE && selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) { + if (rr_min_io == UINT32_MAX) { + rr_min_io = 1; + } else if (rr_min_io == 0) { + rc = -EINVAL; + goto exit; + } + } else if (rr_min_io != UINT32_MAX) { + rc = -EINVAL; + goto exit; + } + ctx = calloc(1, sizeof(*ctx)); if (ctx == NULL) { SPDK_ERRLOG("Failed to alloc context.\n"); rc = -ENOMEM; - goto err_alloc; + goto exit; } ctx->cb_fn = cb_fn; @@ -4213,6 +4236,7 @@ bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy pthread_mutex_lock(&nbdev->mutex); nbdev->mp_policy = policy; nbdev->mp_selector = selector; + nbdev->rr_min_io = rr_min_io; pthread_mutex_unlock(&nbdev->mutex); spdk_for_each_channel(nbdev, @@ -4225,7 +4249,7 @@ err_module: spdk_bdev_close(ctx->desc); err_open: free(ctx); -err_alloc: +exit: cb_fn(cb_arg, rc); } diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index 2798ab7c7..09ac1bb7a 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -164,6 +164,7 @@ struct nvme_bdev { int ref; enum bdev_nvme_multipath_policy mp_policy; enum bdev_nvme_multipath_selector mp_selector; + uint32_t rr_min_io; TAILQ_HEAD(, nvme_ns) nvme_ns_list; bool opal; TAILQ_ENTRY(nvme_bdev) tailq; @@ -203,6 +204,8 @@ struct nvme_bdev_channel { struct nvme_io_path *current_io_path; enum bdev_nvme_multipath_policy mp_policy; enum bdev_nvme_multipath_selector mp_selector; + uint32_t rr_min_io; + uint32_t rr_counter; STAILQ_HEAD(, nvme_io_path) io_path_list; TAILQ_HEAD(retry_io_head, spdk_bdev_io) retry_io_list; struct spdk_poller *retry_io_poller; @@ -353,11 +356,13 @@ typedef void (*bdev_nvme_set_multipath_policy_cb)(void *cb_arg, int rc); * \param name NVMe bdev name * \param policy Multipath policy (active-passive or active-active) * \param selector Multipath selector (round_robin, queue_depth) + * \param rr_min_io Number of IO to route to a path before switching to another for round-robin * \param cb_fn Function to be called back after completion. */ void bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy, enum bdev_nvme_multipath_selector selector, + uint32_t rr_min_io, bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg); diff --git a/module/bdev/nvme/bdev_nvme_rpc.c b/module/bdev/nvme/bdev_nvme_rpc.c index f443c4b07..7481cadae 100644 --- a/module/bdev/nvme/bdev_nvme_rpc.c +++ b/module/bdev/nvme/bdev_nvme_rpc.c @@ -2210,6 +2210,7 @@ struct rpc_set_multipath_policy { char *name; enum bdev_nvme_multipath_policy policy; enum bdev_nvme_multipath_selector selector; + uint32_t rr_min_io; }; static void @@ -2256,6 +2257,7 @@ static const struct spdk_json_object_decoder rpc_set_multipath_policy_decoders[] {"name", offsetof(struct rpc_set_multipath_policy, name), spdk_json_decode_string}, {"policy", offsetof(struct rpc_set_multipath_policy, policy), rpc_decode_mp_policy}, {"selector", offsetof(struct rpc_set_multipath_policy, selector), rpc_decode_mp_selector, true}, + {"rr_min_io", offsetof(struct rpc_set_multipath_policy, rr_min_io), spdk_json_decode_uint32, true}, }; struct rpc_set_multipath_policy_ctx { @@ -2290,6 +2292,8 @@ rpc_bdev_nvme_set_multipath_policy(struct spdk_jsonrpc_request *request, return; } + ctx->req.rr_min_io = UINT32_MAX; + if (spdk_json_decode_object(params, rpc_set_multipath_policy_decoders, SPDK_COUNTOF(rpc_set_multipath_policy_decoders), &ctx->req)) { @@ -2309,6 +2313,7 @@ rpc_bdev_nvme_set_multipath_policy(struct spdk_jsonrpc_request *request, } bdev_nvme_set_multipath_policy(ctx->req.name, ctx->req.policy, ctx->req.selector, + ctx->req.rr_min_io, rpc_bdev_nvme_set_multipath_policy_done, ctx); return; diff --git a/python/spdk/rpc/bdev.py b/python/spdk/rpc/bdev.py index d8f0a2ed6..e95d82c18 100644 --- a/python/spdk/rpc/bdev.py +++ b/python/spdk/rpc/bdev.py @@ -943,19 +943,22 @@ def bdev_nvme_set_preferred_path(client, name, cntlid): return client.call('bdev_nvme_set_preferred_path', params) -def bdev_nvme_set_multipath_policy(client, name, policy, selector): +def bdev_nvme_set_multipath_policy(client, name, policy, selector, rr_min_io): """Set multipath policy of the NVMe bdev Args: name: NVMe bdev name policy: Multipath policy (active_passive or active_active) selector: Multipath selector (round_robin, queue_depth) + rr_min_io: Number of IO to route to a path before switching to another one (optional) """ params = {'name': name, 'policy': policy} if selector: params['selector'] = selector + if rr_min_io: + params['rr_min_io'] = rr_min_io return client.call('bdev_nvme_set_multipath_policy', params) diff --git a/scripts/rpc.py b/scripts/rpc.py index 7a43a5288..c7c5402bb 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -861,13 +861,15 @@ if __name__ == "__main__": rpc.bdev.bdev_nvme_set_multipath_policy(args.client, name=args.name, policy=args.policy, - selector=args.selector) + selector=args.selector, + rr_min_io=args.rr_min_io) p = subparsers.add_parser('bdev_nvme_set_multipath_policy', help="""Set multipath policy of the NVMe bdev""") p.add_argument('-b', '--name', help='Name of the NVMe bdev', required=True) p.add_argument('-p', '--policy', help='Multipath policy (active_passive or active_active)', required=True) p.add_argument('-s', '--selector', help='Multipath selector (round_robin, queue_depth)', required=False) + p.add_argument('-r', '--rr-min-io', help='Number of IO to route to a path before switching to another for round-robin', required=False) p.set_defaults(func=bdev_nvme_set_multipath_policy) def bdev_nvme_cuse_register(args): diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 3ebb2937c..cf924a357 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -5898,6 +5898,22 @@ test_find_next_io_path(void) nvme_ns2.ana_state = SPDK_NVME_ANA_NON_OPTIMIZED_STATE; nvme_ns3.ana_state = SPDK_NVME_ANA_NON_OPTIMIZED_STATE; CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path2); + + /* Test if next io_path is selected according to rr_min_io */ + + nbdev_ch.current_io_path = NULL; + nbdev_ch.rr_min_io = 2; + nbdev_ch.rr_counter = 0; + nvme_ns1.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE; + nvme_ns2.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE; + CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1); + CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1); + CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path2); + CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path2); + + nvme_ns3.ana_state = SPDK_NVME_ANA_NON_OPTIMIZED_STATE; + CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1); + CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1); } static void @@ -6177,13 +6193,14 @@ test_set_multipath_policy(void) */ done = -1; bdev_nvme_set_multipath_policy(bdev->disk.name, BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE, - BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH, + BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH, UINT32_MAX, ut_set_multipath_policy_done, &done); poll_threads(); CU_ASSERT(done == 0); CU_ASSERT(bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE); CU_ASSERT(bdev->mp_selector == BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH); + CU_ASSERT(bdev->rr_min_io == UINT32_MAX); ch = spdk_get_io_channel(bdev); SPDK_CU_ASSERT_FATAL(ch != NULL); @@ -6191,13 +6208,14 @@ test_set_multipath_policy(void) CU_ASSERT(nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE); CU_ASSERT(nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH); + CU_ASSERT(nbdev_ch->rr_min_io == UINT32_MAX); /* If multipath policy is updated while a I/O channel is active, * the update should be applied to the I/O channel immediately. */ done = -1; bdev_nvme_set_multipath_policy(bdev->disk.name, BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE, - BDEV_NVME_MP_SELECTOR_ROUND_ROBIN, + BDEV_NVME_MP_SELECTOR_ROUND_ROBIN, UINT32_MAX, ut_set_multipath_policy_done, &done); poll_threads(); CU_ASSERT(done == 0); @@ -6206,6 +6224,8 @@ test_set_multipath_policy(void) CU_ASSERT(nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE); CU_ASSERT(bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN); CU_ASSERT(nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN); + CU_ASSERT(bdev->rr_min_io == UINT32_MAX); + CU_ASSERT(nbdev_ch->rr_min_io == UINT32_MAX); spdk_put_io_channel(ch); @@ -6325,12 +6345,13 @@ test_retry_io_to_same_path(void) done = -1; bdev_nvme_set_multipath_policy(bdev->disk.name, BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE, - BDEV_NVME_MP_SELECTOR_ROUND_ROBIN, ut_set_multipath_policy_done, &done); + BDEV_NVME_MP_SELECTOR_ROUND_ROBIN, 1, ut_set_multipath_policy_done, &done); poll_threads(); CU_ASSERT(done == 0); CU_ASSERT(bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE); CU_ASSERT(bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN); + CU_ASSERT(bdev->rr_min_io == 1); ch = spdk_get_io_channel(bdev); SPDK_CU_ASSERT_FATAL(ch != NULL); @@ -6338,6 +6359,7 @@ test_retry_io_to_same_path(void) CU_ASSERT(nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE); CU_ASSERT(bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN); + CU_ASSERT(nbdev_ch->rr_min_io == 1); bdev_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, ch); ut_bdev_io_set_buf(bdev_io);