diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 9ff35bcbb..282c693d2 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -2956,9 +2956,10 @@ multipathing. This is done by specifying the `name` parameter as an existing con path, the hostnqn, hostsvcid, hostaddr, prchk_reftag, and prchk_guard_arguments must not be specified and are assumed to have the same value as the existing path. -The parameters, `ctrlr_loss_timeout_sec` and `reconnect_delay_sec`, are mutually dependent. +The parameters, `ctrlr_loss_timeout_sec`, `reconnect_delay_sec`, and `fast_io_fail_timeout_sec`, are mutually dependent. If `reconnect_delay_sec` is non-zero, `ctrlr_loss_timeout_sec` has to be -1 or not less than `reconnect_delay_sec`. If `reconnect_delay_sec` is zero, `ctrlr_loss_timeout_sec` has to be zero. +If `fast_io_fail_timeout_sec` is not zero, it has to be not less than `reconnect_delay_sec` and less than `ctrlr_loss_timeout_sec` if `ctrlr_loss_timeout_sec` is not -1. #### Result @@ -2986,6 +2987,7 @@ multipath | Optional | string | Multipathing behavior: dis num_io_queues | Optional | uint32_t | The number of IO queues to request during initialization. Range: (0, UINT16_MAX + 1], Default is 1024. ctrlr_loss_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnects. 0 means no reconnect. reconnect_delay_sec | Optional | number | Time to delay a reconnect trial. 0 means no reconnect. +fast_io_fail_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout. #### Example diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index 461b57da6..4b112cf90 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -737,6 +737,10 @@ nvme_io_path_is_failed(struct nvme_io_path *io_path) return true; } + if (nvme_ctrlr->fast_io_fail_timedout) { + return true; + } + if (nvme_ctrlr->resetting) { if (nvme_ctrlr->reconnect_delay_sec != 0) { return false; @@ -1312,6 +1316,23 @@ bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr) } } +static bool +bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr) +{ + uint32_t elapsed; + + if (nvme_ctrlr->fast_io_fail_timeout_sec == 0) { + return false; + } + + elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz(); + if (elapsed >= nvme_ctrlr->fast_io_fail_timeout_sec) { + return true; + } else { + return false; + } +} + enum bdev_nvme_op_after_reset { OP_NONE, OP_COMPLETE_PENDING_DESTRUCT, @@ -1333,6 +1354,9 @@ bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success) } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) { return OP_DESTRUCT; } else { + if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) { + nvme_ctrlr->fast_io_fail_timedout = true; + } bdev_nvme_failover_trid(nvme_ctrlr, false); return OP_DELAYED_RECONNECT; } @@ -3370,6 +3394,7 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr, nvme_ctrlr->prchk_flags = ctx->prchk_flags; nvme_ctrlr->ctrlr_loss_timeout_sec = ctx->ctrlr_loss_timeout_sec; nvme_ctrlr->reconnect_delay_sec = ctx->reconnect_delay_sec; + nvme_ctrlr->fast_io_fail_timeout_sec = ctx->fast_io_fail_timeout_sec; } nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr, @@ -3837,7 +3862,8 @@ bdev_nvme_async_poll(void *arg) static bool bdev_nvme_check_multipath_params(int32_t ctrlr_loss_timeout_sec, - uint32_t reconnect_delay_sec) + uint32_t reconnect_delay_sec, + uint32_t fast_io_fail_timeout_sec) { if (ctrlr_loss_timeout_sec < -1) { SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n"); @@ -3846,6 +3872,10 @@ bdev_nvme_check_multipath_params(int32_t ctrlr_loss_timeout_sec, if (reconnect_delay_sec == 0) { SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n"); return false; + } else if (fast_io_fail_timeout_sec != 0 && + fast_io_fail_timeout_sec < reconnect_delay_sec) { + SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n"); + return false; } } else if (ctrlr_loss_timeout_sec != 0) { if (reconnect_delay_sec == 0) { @@ -3854,9 +3884,17 @@ bdev_nvme_check_multipath_params(int32_t ctrlr_loss_timeout_sec, } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) { SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n"); return false; + } else if (fast_io_fail_timeout_sec != 0) { + if (fast_io_fail_timeout_sec < reconnect_delay_sec) { + SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n"); + return false; + } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) { + SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n"); + return false; + } } - } else if (reconnect_delay_sec != 0) { - SPDK_ERRLOG("reconnect_delay_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n"); + } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) { + SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n"); return false; } @@ -3874,7 +3912,8 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid, struct spdk_nvme_ctrlr_opts *opts, bool multipath, int32_t ctrlr_loss_timeout_sec, - uint32_t reconnect_delay_sec) + uint32_t reconnect_delay_sec, + uint32_t fast_io_fail_timeout_sec) { struct nvme_probe_skip_entry *entry, *tmp; struct nvme_async_probe_ctx *ctx; @@ -3888,7 +3927,8 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid, return -EEXIST; } - if (!bdev_nvme_check_multipath_params(ctrlr_loss_timeout_sec, reconnect_delay_sec)) { + if (!bdev_nvme_check_multipath_params(ctrlr_loss_timeout_sec, reconnect_delay_sec, + fast_io_fail_timeout_sec)) { return -EINVAL; } @@ -3905,6 +3945,7 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid, ctx->trid = *trid; ctx->ctrlr_loss_timeout_sec = ctrlr_loss_timeout_sec; ctx->reconnect_delay_sec = reconnect_delay_sec; + ctx->fast_io_fail_timeout_sec = fast_io_fail_timeout_sec; if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) { @@ -4230,7 +4271,7 @@ discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl, snprintf(new_ctx->opts.hostnqn, sizeof(new_ctx->opts.hostnqn), "%s", ctx->hostnqn); rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0, 0, discovery_attach_controller_done, new_ctx, - &new_ctx->opts, true, 0, 0); + &new_ctx->opts, true, 0, 0, 0); if (rc == 0) { TAILQ_INSERT_TAIL(&ctx->ctrlr_ctxs, new_ctx, tailq); ctx->attach_in_progress++; @@ -5561,6 +5602,7 @@ nvme_ctrlr_config_json(struct spdk_json_write_ctx *w, (nvme_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0); spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->ctrlr_loss_timeout_sec); spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->reconnect_delay_sec); + spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", nvme_ctrlr->fast_io_fail_timeout_sec); spdk_json_write_object_end(w); diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index e2b5ae446..2c4b11df2 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -60,6 +60,7 @@ struct nvme_async_probe_ctx { uint32_t prchk_flags; int32_t ctrlr_loss_timeout_sec; uint32_t reconnect_delay_sec; + uint32_t fast_io_fail_timeout_sec; struct spdk_poller *poller; struct spdk_nvme_transport_id trid; struct spdk_nvme_ctrlr_opts opts; @@ -110,6 +111,7 @@ struct nvme_ctrlr { uint32_t resetting : 1; uint32_t reconnect_is_delayed : 1; + uint32_t fast_io_fail_timedout : 1; uint32_t destruct : 1; uint32_t ana_log_page_updating : 1; /** @@ -148,6 +150,7 @@ struct nvme_ctrlr { uint32_t reconnect_delay_sec; int32_t ctrlr_loss_timeout_sec; + uint32_t fast_io_fail_timeout_sec; pthread_mutex_t mutex; }; @@ -267,7 +270,8 @@ int bdev_nvme_create(struct spdk_nvme_transport_id *trid, struct spdk_nvme_ctrlr_opts *opts, bool multipath, int32_t ctrlr_loss_timeout_sec, - uint32_t reconnect_delay_sec); + uint32_t reconnect_delay_sec, + uint32_t fast_io_fail_timeout_sec); int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name, struct spdk_nvme_ctrlr_opts *opts, diff --git a/module/bdev/nvme/bdev_nvme_rpc.c b/module/bdev/nvme/bdev_nvme_rpc.c index 5c8647c03..3f17a1191 100644 --- a/module/bdev/nvme/bdev_nvme_rpc.c +++ b/module/bdev/nvme/bdev_nvme_rpc.c @@ -186,6 +186,7 @@ struct rpc_bdev_nvme_attach_controller { char *multipath; int32_t ctrlr_loss_timeout_sec; uint32_t reconnect_delay_sec; + uint32_t fast_io_fail_timeout_sec; struct spdk_nvme_ctrlr_opts opts; }; @@ -227,6 +228,7 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_attach_controller_dec {"num_io_queues", offsetof(struct rpc_bdev_nvme_attach_controller, opts.num_io_queues), spdk_json_decode_uint32, true}, {"ctrlr_loss_timeout_sec", offsetof(struct rpc_bdev_nvme_attach_controller, ctrlr_loss_timeout_sec), spdk_json_decode_int32, true}, {"reconnect_delay_sec", offsetof(struct rpc_bdev_nvme_attach_controller, reconnect_delay_sec), spdk_json_decode_uint32, true}, + {"fast_io_fail_timeout_sec", offsetof(struct rpc_bdev_nvme_attach_controller, fast_io_fail_timeout_sec), spdk_json_decode_uint32, true}, }; #define NVME_MAX_BDEVS_PER_RPC 128 @@ -497,7 +499,7 @@ rpc_bdev_nvme_attach_controller(struct spdk_jsonrpc_request *request, rc = bdev_nvme_create(&trid, ctx->req.name, ctx->names, ctx->count, prchk_flags, rpc_bdev_nvme_attach_controller_done, ctx, &ctx->req.opts, multipath, ctx->req.ctrlr_loss_timeout_sec, - ctx->req.reconnect_delay_sec); + ctx->req.reconnect_delay_sec, ctx->req.fast_io_fail_timeout_sec); if (rc) { spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc)); goto cleanup; diff --git a/scripts/rpc.py b/scripts/rpc.py index cfe01527e..1b52176c4 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -540,7 +540,8 @@ if __name__ == "__main__": multipath=args.multipath, num_io_queues=args.num_io_queues, ctrlr_loss_timeout_sec=args.ctrlr_loss_timeout_sec, - reconnect_delay_sec=args.reconnect_delay_sec)) + reconnect_delay_sec=args.reconnect_delay_sec, + fast_io_fail_timeout_sec=args.fast_io_fail_timeout_sec)) p = subparsers.add_parser('bdev_nvme_attach_controller', aliases=['construct_nvme_bdev'], help='Add bdevs with nvme backend') @@ -586,6 +587,12 @@ if __name__ == "__main__": If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_delay_sec has to be non-zero and less than ctrlr_loss_timeout_sec.""", type=int) + p.add_argument('-u', '--fast-io-fail-timeout-sec', + help="""Time to wait until ctrlr is reconnected before failing I/O to ctrlr. + 0 means no such timeout. + If fast_io_fail_timeout_sec is not zero, it has to be not less than reconnect_delay_sec and + less than ctrlr_loss_timeout_sec if ctrlr_loss_timeout_sec is not -1.""", + type=int) p.set_defaults(func=bdev_nvme_attach_controller) def bdev_nvme_get_controllers(args): diff --git a/scripts/rpc/bdev.py b/scripts/rpc/bdev.py index 711920507..e05104652 100644 --- a/scripts/rpc/bdev.py +++ b/scripts/rpc/bdev.py @@ -524,7 +524,8 @@ def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvc priority=None, subnqn=None, hostnqn=None, hostaddr=None, hostsvcid=None, prchk_reftag=None, prchk_guard=None, hdgst=None, ddgst=None, fabrics_timeout=None, multipath=None, num_io_queues=None, - ctrlr_loss_timeout_sec=None, reconnect_delay_sec=None): + ctrlr_loss_timeout_sec=None, reconnect_delay_sec=None, + fast_io_fail_timeout_sec=None): """Construct block device for each NVMe namespace in the attached controller. Args: @@ -555,6 +556,10 @@ def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvc If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero. If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_sec has to be non-zero and less than ctrlr_loss_timeout_sec. (optional) + fail_io_fast_timeout_sec: Time to wait until ctrlr is reconnected before failing I/O to ctrlr. + 0 means no such timeout. + If fast_io_fail_timeout_sec is not zero, it has to be not less than reconnect_delay_sec and less than + ctrlr_loss_timeout_sec if ctrlr_loss_timeout_sec is not -1. (optional) Returns: Names of created block devices. @@ -611,6 +616,9 @@ def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvc if reconnect_delay_sec is not None: params['reconnect_delay_sec'] = reconnect_delay_sec + if fast_io_fail_timeout_sec is not None: + params['fast_io_fail_timeout_sec'] = fast_io_fail_timeout_sec + return client.call('bdev_nvme_attach_controller', params) diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index d3fb96a2d..df737d3df 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -1670,7 +1670,7 @@ test_pending_reset(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1801,7 +1801,7 @@ test_attach_ctrlr(void) g_ut_attach_bdev_count = 0; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1816,7 +1816,7 @@ test_attach_ctrlr(void) g_ut_attach_ctrlr_status = 0; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1844,7 +1844,7 @@ test_attach_ctrlr(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1880,7 +1880,7 @@ test_attach_ctrlr(void) g_ut_attach_bdev_count = 0; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1934,7 +1934,7 @@ test_aer_cb(void) g_ut_attach_bdev_count = 3; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2140,7 +2140,7 @@ test_submit_nvme_cmd(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2232,7 +2232,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2247,7 +2247,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2277,7 +2277,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr3 != NULL); rc = bdev_nvme_create(&path3.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2324,7 +2324,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2339,7 +2339,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2397,7 +2397,7 @@ test_abort(void) set_thread(1); rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, -1, 1); + attach_ctrlr_done, NULL, NULL, false, -1, 1, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2646,7 +2646,7 @@ test_bdev_unregister(void) g_ut_attach_bdev_count = 2; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2762,7 +2762,7 @@ test_init_ana_log_page(void) g_ut_attach_bdev_count = 5; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2863,7 +2863,7 @@ test_reconnect_qpair(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2995,7 +2995,7 @@ test_create_bdev_ctrlr(void) g_ut_attach_bdev_count = 0; rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3017,7 +3017,7 @@ test_create_bdev_ctrlr(void) ctrlr2->cdata.cntlid = ctrlr1->cdata.cntlid; rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3035,7 +3035,7 @@ test_create_bdev_ctrlr(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3068,7 +3068,7 @@ test_create_bdev_ctrlr(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3078,7 +3078,7 @@ test_create_bdev_ctrlr(void) poll_threads(); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3168,7 +3168,7 @@ test_add_multi_ns_to_bdev(void) g_ut_attach_bdev_count = 3; rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, 32, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3194,7 +3194,7 @@ test_add_multi_ns_to_bdev(void) g_ut_attach_bdev_count = 2; rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, 32, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3271,7 +3271,7 @@ test_add_multi_ns_to_bdev(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, 32, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3288,7 +3288,7 @@ test_add_multi_ns_to_bdev(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, 32, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3374,7 +3374,7 @@ test_add_multi_io_paths_to_nbdev_ch(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3389,7 +3389,7 @@ test_add_multi_io_paths_to_nbdev_ch(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3437,7 +3437,7 @@ test_add_multi_io_paths_to_nbdev_ch(void) memset(&ctrlr3->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path3.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3516,7 +3516,7 @@ test_admin_path(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3531,7 +3531,7 @@ test_admin_path(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3645,7 +3645,7 @@ test_reset_bdev_ctrlr(void) SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3658,7 +3658,7 @@ test_reset_bdev_ctrlr(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3947,7 +3947,7 @@ test_retry_io_if_ana_state_is_updating(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, -1, 1); + attach_ctrlr_done, NULL, NULL, false, -1, 1, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4074,7 +4074,7 @@ test_retry_io_for_io_path_error(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4169,7 +4169,7 @@ test_retry_io_for_io_path_error(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4277,7 +4277,7 @@ test_retry_io_count(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4463,7 +4463,7 @@ test_concurrent_read_ana_log_page(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4558,7 +4558,7 @@ test_retry_io_for_ana_error(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4704,7 +4704,7 @@ test_retry_admin_passthru_for_path_error(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4786,7 +4786,7 @@ test_retry_admin_passthru_for_path_error(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true, 0, 0); + attach_ctrlr_done, NULL, NULL, true, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4878,7 +4878,7 @@ test_retry_admin_passthru_by_count(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, 0, 0); + attach_ctrlr_done, NULL, NULL, false, 0, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4973,17 +4973,28 @@ test_retry_admin_passthru_by_count(void) static void test_check_multipath_params(void) { - /* 1st parameter is ctrlr_loss_timeout_sec and 2nd parameter is reconnect_delay_sec. */ - CU_ASSERT(bdev_nvme_check_multipath_params(-2, 1) == false); - CU_ASSERT(bdev_nvme_check_multipath_params(-1, 0) == false); - CU_ASSERT(bdev_nvme_check_multipath_params(1, 0) == false); - CU_ASSERT(bdev_nvme_check_multipath_params(1, 2) == false); - CU_ASSERT(bdev_nvme_check_multipath_params(0, 1) == false); - CU_ASSERT(bdev_nvme_check_multipath_params(-1, 1) == true); - CU_ASSERT(bdev_nvme_check_multipath_params(2, 2) == true); - CU_ASSERT(bdev_nvme_check_multipath_params(2, 1) == true); - CU_ASSERT(bdev_nvme_check_multipath_params(INT32_MAX, INT32_MAX) == true); - CU_ASSERT(bdev_nvme_check_multipath_params(-1, UINT32_MAX) == true); + /* 1st parameter is ctrlr_loss_timeout_sec, 2nd parameter is reconnect_delay_sec, and + * 3rd parameter is fast_io_fail_timeout_sec. + */ + CU_ASSERT(bdev_nvme_check_multipath_params(-2, 1, 0) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, 0, 0) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(1, 0, 0) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(1, 2, 0) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(0, 1, 0) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, 1, 0) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(2, 2, 0) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(2, 1, 0) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(INT32_MAX, INT32_MAX, 0) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, UINT32_MAX, 0) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(0, 0, 1) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, 2, 1) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(3, 2, 4) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(3, 2, 1) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, 1, 1) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(2, 1, 2) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(2, 1, 1) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(INT32_MAX, INT32_MAX, INT32_MAX) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, UINT32_MAX, UINT32_MAX) == true); } static void @@ -5016,7 +5027,7 @@ test_retry_io_if_ctrlr_is_resetting(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, -1, 1); + attach_ctrlr_done, NULL, NULL, false, -1, 1, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -5175,7 +5186,7 @@ test_retry_admin_passthru_if_ctrlr_is_resetting(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false, -1, 1); + attach_ctrlr_done, NULL, NULL, false, -1, 1, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -5518,6 +5529,170 @@ test_retry_failover_ctrlr(void) CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL); } +static void +test_fail_path(void) +{ + struct nvme_path_id path = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_bdev_ctrlr *nbdev_ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + struct nvme_bdev *bdev; + struct nvme_ns *nvme_ns; + struct spdk_bdev_io *bdev_io; + struct spdk_io_channel *ch; + struct nvme_bdev_channel *nbdev_ch; + struct nvme_io_path *io_path; + struct nvme_ctrlr_channel *ctrlr_ch; + int rc; + + /* The test scenario is the following. + * - We set ctrlr_fail_timeout_sec to be smaller than ctrlr_loss_timeout_sec. + * - Rresetting a ctrlr fails and reconnecting the ctrlr is repeated. + * - While reconnecting the ctrlr, an I/O is submitted and queued. + * - The I/O waits until the ctrlr is recovered but ctrlr_fail_timeout_sec + * comes first. The queued I/O is failed. + * - After ctrlr_fail_timeout_sec, any I/O is failed immediately. + * - Then ctrlr_loss_timeout_sec comes and the ctrlr is deleted. + */ + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&path.trid); + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false, 4, 1, 2); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL); + + nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid); + CU_ASSERT(nvme_ctrlr != NULL); + + bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1); + CU_ASSERT(bdev != NULL); + + nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr); + CU_ASSERT(nvme_ns != NULL); + + ch = spdk_get_io_channel(bdev); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + nbdev_ch = spdk_io_channel_get_ctx(ch); + + io_path = ut_get_io_path_by_ctrlr(nbdev_ch, nvme_ctrlr); + SPDK_CU_ASSERT_FATAL(io_path != NULL); + + ctrlr_ch = io_path->ctrlr_ch; + SPDK_CU_ASSERT_FATAL(ctrlr_ch != NULL); + SPDK_CU_ASSERT_FATAL(ctrlr_ch->qpair != NULL); + + bdev_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, ch); + ut_bdev_io_set_buf(bdev_io); + + + /* Resetting a ctrlr should fail and a reconnect timer should be registered. */ + ctrlr->fail_reset = true; + ctrlr->is_failed = true; + + rc = bdev_nvme_reset(nvme_ctrlr); + CU_ASSERT(rc == 0); + CU_ASSERT(nvme_ctrlr->resetting == true); + CU_ASSERT(ctrlr->is_failed == true); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr->is_failed == false); + CU_ASSERT(ctrlr_ch->qpair == NULL); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL); + CU_ASSERT(nvme_ctrlr->reset_start_tsc != 0); + CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == false); + + /* I/O should be queued. */ + bdev_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io); + + CU_ASSERT(bdev_io->internal.in_submit_request == true); + CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + /* After a second, the I/O should be still queued and the ctrlr should be + * still recovering. + */ + spdk_delay_us(SPDK_SEC_TO_USEC); + poll_threads(); + + CU_ASSERT(bdev_io->internal.in_submit_request == true); + CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr->is_failed == false); + CU_ASSERT(ctrlr_ch->qpair == NULL); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL); + CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == false); + CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == false); + + /* After two seconds, ctrlr_fail_timeout_sec should expire. */ + spdk_delay_us(SPDK_SEC_TO_USEC); + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr->is_failed == false); + CU_ASSERT(ctrlr_ch->qpair == NULL); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL); + CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == false); + CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == true); + + /* Then within a second, pending I/O should be failed. */ + spdk_delay_us(SPDK_SEC_TO_USEC); + poll_threads(); + + CU_ASSERT(bdev_io->internal.in_submit_request == false); + CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_FAILED); + CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list)); + + /* Another I/O submission should be failed immediately. */ + bdev_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io); + + CU_ASSERT(bdev_io->internal.in_submit_request == false); + CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_FAILED); + + /* After four seconds, path_loss_timeout_sec should expire and ctrlr should + * be deleted. + */ + spdk_delay_us(SPDK_SEC_TO_USEC); + poll_threads(); + + CU_ASSERT(nvme_ctrlr == nvme_ctrlr_get_by_name("nvme0")); + CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == true); + CU_ASSERT(nvme_ctrlr->destruct == true); + + spdk_put_io_channel(ch); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL); + + free(bdev_io); +} + int main(int argc, const char **argv) { @@ -5564,6 +5739,7 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting); CU_ADD_TEST(suite, test_reconnect_ctrlr); CU_ADD_TEST(suite, test_retry_failover_ctrlr); + CU_ADD_TEST(suite, test_fail_path); CU_basic_set_mode(CU_BRM_VERBOSE);