bdev/nvme: support switch to another io path after a number of IOs

Support to specify rr_min_io for multipath round-robin policy,
which makes I/O switches to another io path after rr_min_io I/Os are
rounted to current io path.

Change-Id: I09f0d8d24271c0178ff816fa63ce8576b6e8ae47
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15445
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
This commit is contained in:
Richael Zhuang 2022-10-31 15:13:30 +08:00 committed by Jim Harris
parent 6fa3497b69
commit 2f500a23fb
8 changed files with 77 additions and 11 deletions

View File

@ -94,6 +94,10 @@ Changed `bdev_raid_get_bdevs` RPC output format to include raid_bdev details.
Added `selector` parameter to bdev_nvme_set_multipath_policy RPC to set path selector for multipath.
Option `round_robin` and `queue_depth` are available.
Added `rr_min_io` option to RPC bdev_nvme_set_multipath_policy. It switches I/O to
another path after rr_min_io I/Os are routed to current io path for the round-robin
path selector.
### bdevperf
Promoted the application to example to match similar programs: fio_plugin and perf.

View File

@ -4145,6 +4145,7 @@ Name | Optional | Type | Description
name | Required | string | Name of the NVMe bdev
policy | Required | string | Multipath policy: active_active or active_passive
selector | Optional | string | Multipath selector: round_robin or queue_depth, used in active-active mode. Default is round_robin
rr_min_io | Optional | number | Number of I/Os routed to current io path before switching to another for round-robin selector. The min value is 1.
#### Example

View File

@ -557,6 +557,7 @@ static void
bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
{
nbdev_ch->current_io_path = NULL;
nbdev_ch->rr_counter = 0;
}
static struct nvme_io_path *
@ -662,6 +663,7 @@ bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
nbdev_ch->mp_policy = nbdev->mp_policy;
nbdev_ch->mp_selector = nbdev->mp_selector;
nbdev_ch->rr_min_io = nbdev->rr_min_io;
TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
@ -928,9 +930,15 @@ _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
static inline struct nvme_io_path *
bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
{
if (spdk_likely(nbdev_ch->current_io_path != NULL &&
nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE)) {
if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
return nbdev_ch->current_io_path;
} else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
return nbdev_ch->current_io_path;
}
nbdev_ch->rr_counter = 0;
}
}
if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
@ -3359,6 +3367,7 @@ nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
bdev->ref = 1;
bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
bdev->rr_min_io = UINT32_MAX;
TAILQ_INIT(&bdev->nvme_ns_list);
TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
bdev->opal = nvme_ctrlr->opal_dev != NULL;
@ -4169,6 +4178,7 @@ _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i)
nbdev_ch->mp_policy = nbdev->mp_policy;
nbdev_ch->mp_selector = nbdev->mp_selector;
nbdev_ch->rr_min_io = nbdev->rr_min_io;
bdev_nvme_clear_current_io_path(nbdev_ch);
spdk_for_each_channel_continue(i, 0);
@ -4176,7 +4186,8 @@ _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i)
void
bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy,
enum bdev_nvme_multipath_selector selector, bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
enum bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
{
struct bdev_nvme_set_multipath_policy_ctx *ctx;
struct spdk_bdev *bdev;
@ -4185,11 +4196,23 @@ bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy
assert(cb_fn != NULL);
if (policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE && selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
if (rr_min_io == UINT32_MAX) {
rr_min_io = 1;
} else if (rr_min_io == 0) {
rc = -EINVAL;
goto exit;
}
} else if (rr_min_io != UINT32_MAX) {
rc = -EINVAL;
goto exit;
}
ctx = calloc(1, sizeof(*ctx));
if (ctx == NULL) {
SPDK_ERRLOG("Failed to alloc context.\n");
rc = -ENOMEM;
goto err_alloc;
goto exit;
}
ctx->cb_fn = cb_fn;
@ -4213,6 +4236,7 @@ bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy
pthread_mutex_lock(&nbdev->mutex);
nbdev->mp_policy = policy;
nbdev->mp_selector = selector;
nbdev->rr_min_io = rr_min_io;
pthread_mutex_unlock(&nbdev->mutex);
spdk_for_each_channel(nbdev,
@ -4225,7 +4249,7 @@ err_module:
spdk_bdev_close(ctx->desc);
err_open:
free(ctx);
err_alloc:
exit:
cb_fn(cb_arg, rc);
}

View File

@ -164,6 +164,7 @@ struct nvme_bdev {
int ref;
enum bdev_nvme_multipath_policy mp_policy;
enum bdev_nvme_multipath_selector mp_selector;
uint32_t rr_min_io;
TAILQ_HEAD(, nvme_ns) nvme_ns_list;
bool opal;
TAILQ_ENTRY(nvme_bdev) tailq;
@ -203,6 +204,8 @@ struct nvme_bdev_channel {
struct nvme_io_path *current_io_path;
enum bdev_nvme_multipath_policy mp_policy;
enum bdev_nvme_multipath_selector mp_selector;
uint32_t rr_min_io;
uint32_t rr_counter;
STAILQ_HEAD(, nvme_io_path) io_path_list;
TAILQ_HEAD(retry_io_head, spdk_bdev_io) retry_io_list;
struct spdk_poller *retry_io_poller;
@ -353,11 +356,13 @@ typedef void (*bdev_nvme_set_multipath_policy_cb)(void *cb_arg, int rc);
* \param name NVMe bdev name
* \param policy Multipath policy (active-passive or active-active)
* \param selector Multipath selector (round_robin, queue_depth)
* \param rr_min_io Number of IO to route to a path before switching to another for round-robin
* \param cb_fn Function to be called back after completion.
*/
void bdev_nvme_set_multipath_policy(const char *name,
enum bdev_nvme_multipath_policy policy,
enum bdev_nvme_multipath_selector selector,
uint32_t rr_min_io,
bdev_nvme_set_multipath_policy_cb cb_fn,
void *cb_arg);

View File

@ -2210,6 +2210,7 @@ struct rpc_set_multipath_policy {
char *name;
enum bdev_nvme_multipath_policy policy;
enum bdev_nvme_multipath_selector selector;
uint32_t rr_min_io;
};
static void
@ -2256,6 +2257,7 @@ static const struct spdk_json_object_decoder rpc_set_multipath_policy_decoders[]
{"name", offsetof(struct rpc_set_multipath_policy, name), spdk_json_decode_string},
{"policy", offsetof(struct rpc_set_multipath_policy, policy), rpc_decode_mp_policy},
{"selector", offsetof(struct rpc_set_multipath_policy, selector), rpc_decode_mp_selector, true},
{"rr_min_io", offsetof(struct rpc_set_multipath_policy, rr_min_io), spdk_json_decode_uint32, true},
};
struct rpc_set_multipath_policy_ctx {
@ -2290,6 +2292,8 @@ rpc_bdev_nvme_set_multipath_policy(struct spdk_jsonrpc_request *request,
return;
}
ctx->req.rr_min_io = UINT32_MAX;
if (spdk_json_decode_object(params, rpc_set_multipath_policy_decoders,
SPDK_COUNTOF(rpc_set_multipath_policy_decoders),
&ctx->req)) {
@ -2309,6 +2313,7 @@ rpc_bdev_nvme_set_multipath_policy(struct spdk_jsonrpc_request *request,
}
bdev_nvme_set_multipath_policy(ctx->req.name, ctx->req.policy, ctx->req.selector,
ctx->req.rr_min_io,
rpc_bdev_nvme_set_multipath_policy_done, ctx);
return;

View File

@ -943,19 +943,22 @@ def bdev_nvme_set_preferred_path(client, name, cntlid):
return client.call('bdev_nvme_set_preferred_path', params)
def bdev_nvme_set_multipath_policy(client, name, policy, selector):
def bdev_nvme_set_multipath_policy(client, name, policy, selector, rr_min_io):
"""Set multipath policy of the NVMe bdev
Args:
name: NVMe bdev name
policy: Multipath policy (active_passive or active_active)
selector: Multipath selector (round_robin, queue_depth)
rr_min_io: Number of IO to route to a path before switching to another one (optional)
"""
params = {'name': name,
'policy': policy}
if selector:
params['selector'] = selector
if rr_min_io:
params['rr_min_io'] = rr_min_io
return client.call('bdev_nvme_set_multipath_policy', params)

View File

@ -861,13 +861,15 @@ if __name__ == "__main__":
rpc.bdev.bdev_nvme_set_multipath_policy(args.client,
name=args.name,
policy=args.policy,
selector=args.selector)
selector=args.selector,
rr_min_io=args.rr_min_io)
p = subparsers.add_parser('bdev_nvme_set_multipath_policy',
help="""Set multipath policy of the NVMe bdev""")
p.add_argument('-b', '--name', help='Name of the NVMe bdev', required=True)
p.add_argument('-p', '--policy', help='Multipath policy (active_passive or active_active)', required=True)
p.add_argument('-s', '--selector', help='Multipath selector (round_robin, queue_depth)', required=False)
p.add_argument('-r', '--rr-min-io', help='Number of IO to route to a path before switching to another for round-robin', required=False)
p.set_defaults(func=bdev_nvme_set_multipath_policy)
def bdev_nvme_cuse_register(args):

View File

@ -5898,6 +5898,22 @@ test_find_next_io_path(void)
nvme_ns2.ana_state = SPDK_NVME_ANA_NON_OPTIMIZED_STATE;
nvme_ns3.ana_state = SPDK_NVME_ANA_NON_OPTIMIZED_STATE;
CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path2);
/* Test if next io_path is selected according to rr_min_io */
nbdev_ch.current_io_path = NULL;
nbdev_ch.rr_min_io = 2;
nbdev_ch.rr_counter = 0;
nvme_ns1.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
nvme_ns2.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1);
CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1);
CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path2);
CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path2);
nvme_ns3.ana_state = SPDK_NVME_ANA_NON_OPTIMIZED_STATE;
CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1);
CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1);
}
static void
@ -6177,13 +6193,14 @@ test_set_multipath_policy(void)
*/
done = -1;
bdev_nvme_set_multipath_policy(bdev->disk.name, BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE,
BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH,
BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH, UINT32_MAX,
ut_set_multipath_policy_done, &done);
poll_threads();
CU_ASSERT(done == 0);
CU_ASSERT(bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE);
CU_ASSERT(bdev->mp_selector == BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH);
CU_ASSERT(bdev->rr_min_io == UINT32_MAX);
ch = spdk_get_io_channel(bdev);
SPDK_CU_ASSERT_FATAL(ch != NULL);
@ -6191,13 +6208,14 @@ test_set_multipath_policy(void)
CU_ASSERT(nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE);
CU_ASSERT(nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH);
CU_ASSERT(nbdev_ch->rr_min_io == UINT32_MAX);
/* If multipath policy is updated while a I/O channel is active,
* the update should be applied to the I/O channel immediately.
*/
done = -1;
bdev_nvme_set_multipath_policy(bdev->disk.name, BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE,
BDEV_NVME_MP_SELECTOR_ROUND_ROBIN,
BDEV_NVME_MP_SELECTOR_ROUND_ROBIN, UINT32_MAX,
ut_set_multipath_policy_done, &done);
poll_threads();
CU_ASSERT(done == 0);
@ -6206,6 +6224,8 @@ test_set_multipath_policy(void)
CU_ASSERT(nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE);
CU_ASSERT(bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN);
CU_ASSERT(nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN);
CU_ASSERT(bdev->rr_min_io == UINT32_MAX);
CU_ASSERT(nbdev_ch->rr_min_io == UINT32_MAX);
spdk_put_io_channel(ch);
@ -6325,12 +6345,13 @@ test_retry_io_to_same_path(void)
done = -1;
bdev_nvme_set_multipath_policy(bdev->disk.name, BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE,
BDEV_NVME_MP_SELECTOR_ROUND_ROBIN, ut_set_multipath_policy_done, &done);
BDEV_NVME_MP_SELECTOR_ROUND_ROBIN, 1, ut_set_multipath_policy_done, &done);
poll_threads();
CU_ASSERT(done == 0);
CU_ASSERT(bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE);
CU_ASSERT(bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN);
CU_ASSERT(bdev->rr_min_io == 1);
ch = spdk_get_io_channel(bdev);
SPDK_CU_ASSERT_FATAL(ch != NULL);
@ -6338,6 +6359,7 @@ test_retry_io_to_same_path(void)
CU_ASSERT(nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE);
CU_ASSERT(bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN);
CU_ASSERT(nbdev_ch->rr_min_io == 1);
bdev_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, ch);
ut_bdev_io_set_buf(bdev_io);