diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 3277b68af..9ff35bcbb 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -2956,6 +2956,10 @@ multipathing. This is done by specifying the `name` parameter as an existing con path, the hostnqn, hostsvcid, hostaddr, prchk_reftag, and prchk_guard_arguments must not be specified and are assumed to have the same value as the existing path. +The parameters, `ctrlr_loss_timeout_sec` and `reconnect_delay_sec`, are mutually dependent. +If `reconnect_delay_sec` is non-zero, `ctrlr_loss_timeout_sec` has to be -1 or not less than `reconnect_delay_sec`. +If `reconnect_delay_sec` is zero, `ctrlr_loss_timeout_sec` has to be zero. + #### Result Array of names of newly created bdevs. @@ -2980,6 +2984,8 @@ ddgst | Optional | bool | Enable TCP data digest fabrics_connect_timeout_us | Optional | bool | Timeout for fabrics connect (in microseconds) multipath | Optional | string | Multipathing behavior: disable, failover, multipath. Default is failover. num_io_queues | Optional | uint32_t | The number of IO queues to request during initialization. Range: (0, UINT16_MAX + 1], Default is 1024. +ctrlr_loss_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnects. 0 means no reconnect. +reconnect_delay_sec | Optional | number | Time to delay a reconnect trial. 0 means no reconnect. #### Example diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index c84c9e190..461b57da6 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -470,6 +470,8 @@ nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr) { int rc; + spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer); + /* First, unregister the adminq poller, as the driver will poll adminq if necessary */ spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller); @@ -735,9 +737,16 @@ nvme_io_path_is_failed(struct nvme_io_path *io_path) return true; } - /* TODO: Regard path as unfailed only if the reset is throttoled. */ if (nvme_ctrlr->resetting) { - return true; + if (nvme_ctrlr->reconnect_delay_sec != 0) { + return false; + } else { + return true; + } + } + + if (nvme_ctrlr->reconnect_is_delayed) { + return false; } if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) { @@ -758,7 +767,7 @@ nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr) return false; } - if (nvme_ctrlr->resetting) { + if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) { return false; } @@ -1285,9 +1294,29 @@ bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove) } } +static bool +bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr) +{ + int32_t elapsed; + + if (nvme_ctrlr->ctrlr_loss_timeout_sec == 0 || + nvme_ctrlr->ctrlr_loss_timeout_sec == -1) { + return false; + } + + elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz(); + if (elapsed >= nvme_ctrlr->ctrlr_loss_timeout_sec) { + return true; + } else { + return false; + } +} + enum bdev_nvme_op_after_reset { OP_NONE, OP_COMPLETE_PENDING_DESTRUCT, + OP_DESTRUCT, + OP_DELAYED_RECONNECT, }; typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset; @@ -1298,9 +1327,60 @@ bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success) if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) { /* Complete pending destruct after reset completes. */ return OP_COMPLETE_PENDING_DESTRUCT; + } else if (success || nvme_ctrlr->reconnect_delay_sec == 0) { + nvme_ctrlr->reset_start_tsc = 0; + return OP_NONE; + } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) { + return OP_DESTRUCT; + } else { + bdev_nvme_failover_trid(nvme_ctrlr, false); + return OP_DELAYED_RECONNECT; + } +} + +static int _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, bool hotplug); +static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr); + +static int +bdev_nvme_reconnect_delay_timer_expired(void *ctx) +{ + struct nvme_ctrlr *nvme_ctrlr = ctx; + + pthread_mutex_lock(&nvme_ctrlr->mutex); + + spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer); + + assert(nvme_ctrlr->reconnect_is_delayed == true); + nvme_ctrlr->reconnect_is_delayed = false; + + if (nvme_ctrlr->destruct) { + pthread_mutex_unlock(&nvme_ctrlr->mutex); + return SPDK_POLLER_BUSY; } - return OP_NONE; + assert(nvme_ctrlr->resetting == false); + nvme_ctrlr->resetting = true; + + pthread_mutex_unlock(&nvme_ctrlr->mutex); + + spdk_poller_resume(nvme_ctrlr->adminq_timer_poller); + + bdev_nvme_reconnect_ctrlr(nvme_ctrlr); + return SPDK_POLLER_BUSY; +} + +static void +bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr) +{ + spdk_poller_pause(nvme_ctrlr->adminq_timer_poller); + + assert(nvme_ctrlr->reconnect_is_delayed == false); + nvme_ctrlr->reconnect_is_delayed = true; + + assert(nvme_ctrlr->reconnect_delay_timer == NULL); + nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired, + nvme_ctrlr, + nvme_ctrlr->reconnect_delay_sec * SPDK_SEC_TO_USEC); } static void @@ -1345,6 +1425,13 @@ _bdev_nvme_reset_complete(struct spdk_io_channel_iter *i, int status) case OP_COMPLETE_PENDING_DESTRUCT: nvme_ctrlr_unregister(nvme_ctrlr); break; + case OP_DESTRUCT: + _bdev_nvme_delete(nvme_ctrlr, false); + break; + case OP_DELAYED_RECONNECT: + spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr); + bdev_nvme_start_reconnect_delay_timer(nvme_ctrlr); + break; default: break; } @@ -1411,11 +1498,13 @@ static int bdev_nvme_reconnect_ctrlr_poll(void *arg) { struct nvme_ctrlr *nvme_ctrlr = arg; - int rc; + int rc = -ETIMEDOUT; - rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr); - if (rc == -EAGAIN) { - return SPDK_POLLER_BUSY; + if (!bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) { + rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr); + if (rc == -EAGAIN) { + return SPDK_POLLER_BUSY; + } } spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller); @@ -1491,7 +1580,17 @@ bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr) return -EBUSY; } + if (nvme_ctrlr->reconnect_is_delayed) { + pthread_mutex_unlock(&nvme_ctrlr->mutex); + SPDK_NOTICELOG("Reconnect is already scheduled.\n"); + return -EBUSY; + } + nvme_ctrlr->resetting = true; + + assert(nvme_ctrlr->reset_start_tsc == 0); + nvme_ctrlr->reset_start_tsc = spdk_get_ticks(); + pthread_mutex_unlock(&nvme_ctrlr->mutex); spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset, nvme_ctrlr); @@ -1643,6 +1742,14 @@ bdev_nvme_failover(struct nvme_ctrlr *nvme_ctrlr, bool remove) bdev_nvme_failover_trid(nvme_ctrlr, remove); + if (nvme_ctrlr->reconnect_is_delayed) { + pthread_mutex_unlock(&nvme_ctrlr->mutex); + SPDK_NOTICELOG("Reconnect is already scheduled.\n"); + + /* We rely on the next reconnect for the failover. */ + return 0; + } + nvme_ctrlr->resetting = true; pthread_mutex_unlock(&nvme_ctrlr->mutex); @@ -3261,6 +3368,8 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr, if (ctx != NULL) { nvme_ctrlr->prchk_flags = ctx->prchk_flags; + nvme_ctrlr->ctrlr_loss_timeout_sec = ctx->ctrlr_loss_timeout_sec; + nvme_ctrlr->reconnect_delay_sec = ctx->reconnect_delay_sec; } nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr, @@ -3726,6 +3835,34 @@ bdev_nvme_async_poll(void *arg) return SPDK_POLLER_BUSY; } +static bool +bdev_nvme_check_multipath_params(int32_t ctrlr_loss_timeout_sec, + uint32_t reconnect_delay_sec) +{ + if (ctrlr_loss_timeout_sec < -1) { + SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n"); + return false; + } else if (ctrlr_loss_timeout_sec == -1) { + if (reconnect_delay_sec == 0) { + SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n"); + return false; + } + } else if (ctrlr_loss_timeout_sec != 0) { + if (reconnect_delay_sec == 0) { + SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n"); + return false; + } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) { + SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n"); + return false; + } + } else if (reconnect_delay_sec != 0) { + SPDK_ERRLOG("reconnect_delay_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n"); + return false; + } + + return true; +} + int bdev_nvme_create(struct spdk_nvme_transport_id *trid, const char *base_name, @@ -3735,7 +3872,9 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid, spdk_bdev_create_nvme_fn cb_fn, void *cb_ctx, struct spdk_nvme_ctrlr_opts *opts, - bool multipath) + bool multipath, + int32_t ctrlr_loss_timeout_sec, + uint32_t reconnect_delay_sec) { struct nvme_probe_skip_entry *entry, *tmp; struct nvme_async_probe_ctx *ctx; @@ -3749,6 +3888,10 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid, return -EEXIST; } + if (!bdev_nvme_check_multipath_params(ctrlr_loss_timeout_sec, reconnect_delay_sec)) { + return -EINVAL; + } + ctx = calloc(1, sizeof(*ctx)); if (!ctx) { return -ENOMEM; @@ -3760,6 +3903,8 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid, ctx->cb_ctx = cb_ctx; ctx->prchk_flags = prchk_flags; ctx->trid = *trid; + ctx->ctrlr_loss_timeout_sec = ctrlr_loss_timeout_sec; + ctx->reconnect_delay_sec = reconnect_delay_sec; if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) { @@ -4085,7 +4230,7 @@ discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl, snprintf(new_ctx->opts.hostnqn, sizeof(new_ctx->opts.hostnqn), "%s", ctx->hostnqn); rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0, 0, discovery_attach_controller_done, new_ctx, - &new_ctx->opts, true); + &new_ctx->opts, true, 0, 0); if (rc == 0) { TAILQ_INSERT_TAIL(&ctx->ctrlr_ctxs, new_ctx, tailq); ctx->attach_in_progress++; @@ -5414,6 +5559,8 @@ nvme_ctrlr_config_json(struct spdk_json_write_ctx *w, (nvme_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0); spdk_json_write_named_bool(w, "prchk_guard", (nvme_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0); + spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->ctrlr_loss_timeout_sec); + spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->reconnect_delay_sec); spdk_json_write_object_end(w); diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index 0136d954d..e2b5ae446 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. All rights reserved. * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -57,6 +58,8 @@ struct nvme_async_probe_ctx { const char **names; uint32_t count; uint32_t prchk_flags; + int32_t ctrlr_loss_timeout_sec; + uint32_t reconnect_delay_sec; struct spdk_poller *poller; struct spdk_nvme_transport_id trid; struct spdk_nvme_ctrlr_opts opts; @@ -106,6 +109,7 @@ struct nvme_ctrlr { int ref; uint32_t resetting : 1; + uint32_t reconnect_is_delayed : 1; uint32_t destruct : 1; uint32_t ana_log_page_updating : 1; /** @@ -127,6 +131,9 @@ struct nvme_ctrlr { struct spdk_poller *reset_detach_poller; struct spdk_nvme_detach_ctx *detach_ctx; + uint64_t reset_start_tsc; + struct spdk_poller *reconnect_delay_timer; + /** linked list pointer for device list */ TAILQ_ENTRY(nvme_ctrlr) tailq; struct nvme_bdev_ctrlr *nbdev_ctrlr; @@ -139,6 +146,9 @@ struct nvme_ctrlr { struct nvme_async_probe_ctx *probe_ctx; + uint32_t reconnect_delay_sec; + int32_t ctrlr_loss_timeout_sec; + pthread_mutex_t mutex; }; @@ -255,7 +265,9 @@ int bdev_nvme_create(struct spdk_nvme_transport_id *trid, spdk_bdev_create_nvme_fn cb_fn, void *cb_ctx, struct spdk_nvme_ctrlr_opts *opts, - bool multipath); + bool multipath, + int32_t ctrlr_loss_timeout_sec, + uint32_t reconnect_delay_sec); int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name, struct spdk_nvme_ctrlr_opts *opts, diff --git a/module/bdev/nvme/bdev_nvme_rpc.c b/module/bdev/nvme/bdev_nvme_rpc.c index ba536fe60..5c8647c03 100644 --- a/module/bdev/nvme/bdev_nvme_rpc.c +++ b/module/bdev/nvme/bdev_nvme_rpc.c @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. All rights reserved. * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -183,6 +184,8 @@ struct rpc_bdev_nvme_attach_controller { bool prchk_guard; uint64_t fabrics_connect_timeout_us; char *multipath; + int32_t ctrlr_loss_timeout_sec; + uint32_t reconnect_delay_sec; struct spdk_nvme_ctrlr_opts opts; }; @@ -222,6 +225,8 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_attach_controller_dec {"fabrics_connect_timeout_us", offsetof(struct rpc_bdev_nvme_attach_controller, opts.fabrics_connect_timeout_us), spdk_json_decode_uint64, true}, {"multipath", offsetof(struct rpc_bdev_nvme_attach_controller, multipath), spdk_json_decode_string, true}, {"num_io_queues", offsetof(struct rpc_bdev_nvme_attach_controller, opts.num_io_queues), spdk_json_decode_uint32, true}, + {"ctrlr_loss_timeout_sec", offsetof(struct rpc_bdev_nvme_attach_controller, ctrlr_loss_timeout_sec), spdk_json_decode_int32, true}, + {"reconnect_delay_sec", offsetof(struct rpc_bdev_nvme_attach_controller, reconnect_delay_sec), spdk_json_decode_uint32, true}, }; #define NVME_MAX_BDEVS_PER_RPC 128 @@ -491,7 +496,8 @@ rpc_bdev_nvme_attach_controller(struct spdk_jsonrpc_request *request, ctx->count = NVME_MAX_BDEVS_PER_RPC; rc = bdev_nvme_create(&trid, ctx->req.name, ctx->names, ctx->count, prchk_flags, rpc_bdev_nvme_attach_controller_done, ctx, &ctx->req.opts, - multipath); + multipath, ctx->req.ctrlr_loss_timeout_sec, + ctx->req.reconnect_delay_sec); if (rc) { spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc)); goto cleanup; diff --git a/scripts/rpc.py b/scripts/rpc.py index 709ec72c4..b5f146f87 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -538,7 +538,9 @@ if __name__ == "__main__": ddgst=args.ddgst, fabrics_timeout=args.fabrics_timeout, multipath=args.multipath, - num_io_queues=args.num_io_queues)) + num_io_queues=args.num_io_queues, + ctrlr_loss_timeout_sec=args.ctrlr_loss_timeout_sec, + reconnect_delay_sec=args.reconnect_delay_sec)) p = subparsers.add_parser('bdev_nvme_attach_controller', aliases=['construct_nvme_bdev'], help='Add bdevs with nvme backend') @@ -570,6 +572,20 @@ if __name__ == "__main__": p.add_argument('--fabrics-timeout', type=int, help='Fabrics connect timeout in microseconds') p.add_argument('-x', '--multipath', help='Set multipath behavior (disable, failover, multipath)') p.add_argument('--num-io-queues', type=int, help='Set the number of IO queues to request during initialization.') + p.add_argument('-l', '--ctrlr-loss-timeout-sec', + help="""Time to wait until ctrlr is reconnected before deleting ctrlr. + -1 means infinite reconnect retries. 0 means no reconnect retry. + If reconnect_delay_sec is zero, ctrlr_loss_timeout_sec has to be zero. + If reconnect_delay_sec is non-zero, ctrlr_loss_timeout_sec has to be -1 or not less than + reconnect_delay_sec.""", + type=int) + p.add_argument('-o', '--reconnect-delay-sec', + help="""Time to delay a reconnect retry. + If ctrlr_loss_timeout_sec is zero, reconnect_delay_sec has to be zero. + If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero. + If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_delay_sec has to be non-zero and + less than ctrlr_loss_timeout_sec.""", + type=int) p.set_defaults(func=bdev_nvme_attach_controller) def bdev_nvme_get_controllers(args): diff --git a/scripts/rpc/bdev.py b/scripts/rpc/bdev.py index 4d50fcdd9..b9a37d953 100644 --- a/scripts/rpc/bdev.py +++ b/scripts/rpc/bdev.py @@ -523,7 +523,8 @@ def bdev_nvme_set_hotplug(client, enable, period_us=None): def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvcid=None, priority=None, subnqn=None, hostnqn=None, hostaddr=None, hostsvcid=None, prchk_reftag=None, prchk_guard=None, - hdgst=None, ddgst=None, fabrics_timeout=None, multipath=None, num_io_queues=None): + hdgst=None, ddgst=None, fabrics_timeout=None, multipath=None, num_io_queues=None, + ctrlr_loss_timeout_sec=None, reconnect_delay_sec=None): """Construct block device for each NVMe namespace in the attached controller. Args: @@ -544,6 +545,16 @@ def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvc fabrics_timeout: Fabrics connect timeout in us (optional) multipath: The behavior when multiple paths are created ("disable", "failover", or "multipath"; failover if not specified) num_io_queues: The number of IO queues to request during initialization. (optional) + ctrlr_loss_timeout_sec: Time to wait until ctrlr is reconnected before deleting ctrlr. + -1 means infinite reconnect retries. 0 means no reconnect retry. + If reconnect_delay_sec is zero, ctrlr_loss_timeout_sec has to be zero. + If reconnect_delay_sec is non-zero, ctrlr_loss_timeout_sec has to be -1 or not less than reconnect_delay_sec. + (optional) + reconnect_delay_sec: Time to delay a reconnect retry. + If ctrlr_loss_timeout_sec is zero, reconnect_delay_sec has to be zero. + If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero. + If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_sec has to be non-zero and less than ctrlr_loss_timeout_sec. + (optional) Returns: Names of created block devices. @@ -594,6 +605,12 @@ def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvc if num_io_queues: params['num_io_queues'] = num_io_queues + if ctrlr_loss_timeout_sec is not None: + params['ctrlr_loss_timeout_sec'] = ctrlr_loss_timeout_sec + + if reconnect_delay_sec is not None: + params['reconnect_delay_sec'] = reconnect_delay_sec + return client.call('bdev_nvme_attach_controller', params) diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index a6214779a..d3fb96a2d 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -3,7 +3,7 @@ * * Copyright (c) Intel Corporation. * All rights reserved. - * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -1670,7 +1670,7 @@ test_pending_reset(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1801,7 +1801,7 @@ test_attach_ctrlr(void) g_ut_attach_bdev_count = 0; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1816,7 +1816,7 @@ test_attach_ctrlr(void) g_ut_attach_ctrlr_status = 0; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1844,7 +1844,7 @@ test_attach_ctrlr(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1880,7 +1880,7 @@ test_attach_ctrlr(void) g_ut_attach_bdev_count = 0; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -1934,7 +1934,7 @@ test_aer_cb(void) g_ut_attach_bdev_count = 3; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2140,7 +2140,7 @@ test_submit_nvme_cmd(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2232,7 +2232,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2247,7 +2247,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2277,7 +2277,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr3 != NULL); rc = bdev_nvme_create(&path3.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2324,7 +2324,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2339,7 +2339,7 @@ test_add_remove_trid(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2397,7 +2397,7 @@ test_abort(void) set_thread(1); rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, -1, 1); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2522,6 +2522,37 @@ test_abort(void) set_thread(0); + /* If qpair is disconnected, it is freed and then reconnected via resetting + * the corresponding nvme_ctrlr. I/O should be queued if it is submitted + * while resetting the nvme_ctrlr. + */ + ctrlr_ch1->qpair->is_connected = false; + + poll_thread_times(0, 3); + + CU_ASSERT(ctrlr_ch1->qpair == NULL); + CU_ASSERT(nvme_ctrlr->resetting == true); + + write_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch1, write_io); + + CU_ASSERT(write_io->internal.in_submit_request == true); + CU_ASSERT(write_io == TAILQ_FIRST(&nbdev_ch1->retry_io_list)); + + /* Aborting the queued write request should succeed immediately. */ + abort_io->internal.ch = (struct spdk_bdev_channel *)ch1; + abort_io->u.abort.bio_to_abort = write_io; + abort_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch1, abort_io); + + CU_ASSERT(abort_io->internal.in_submit_request == false); + CU_ASSERT(abort_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0); + CU_ASSERT(write_io->internal.in_submit_request == false); + CU_ASSERT(write_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED); + spdk_put_io_channel(ch1); set_thread(1); @@ -2615,7 +2646,7 @@ test_bdev_unregister(void) g_ut_attach_bdev_count = 2; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2731,7 +2762,7 @@ test_init_ana_log_page(void) g_ut_attach_bdev_count = 5; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2832,7 +2863,7 @@ test_reconnect_qpair(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -2964,7 +2995,8 @@ test_create_bdev_ctrlr(void) g_ut_attach_bdev_count = 0; rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); + CU_ASSERT(rc == 0); spdk_delay_us(1000); poll_threads(); @@ -2985,7 +3017,7 @@ test_create_bdev_ctrlr(void) ctrlr2->cdata.cntlid = ctrlr1->cdata.cntlid; rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3003,7 +3035,7 @@ test_create_bdev_ctrlr(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3036,7 +3068,7 @@ test_create_bdev_ctrlr(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3046,7 +3078,7 @@ test_create_bdev_ctrlr(void) poll_threads(); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3136,7 +3168,7 @@ test_add_multi_ns_to_bdev(void) g_ut_attach_bdev_count = 3; rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, 32, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3162,7 +3194,7 @@ test_add_multi_ns_to_bdev(void) g_ut_attach_bdev_count = 2; rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, 32, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3239,7 +3271,7 @@ test_add_multi_ns_to_bdev(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, 32, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3256,7 +3288,7 @@ test_add_multi_ns_to_bdev(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, 32, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3342,7 +3374,7 @@ test_add_multi_io_paths_to_nbdev_ch(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3357,7 +3389,7 @@ test_add_multi_io_paths_to_nbdev_ch(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3405,7 +3437,7 @@ test_add_multi_io_paths_to_nbdev_ch(void) memset(&ctrlr3->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path3.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3484,7 +3516,7 @@ test_admin_path(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3499,7 +3531,7 @@ test_admin_path(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3613,7 +3645,7 @@ test_reset_bdev_ctrlr(void) SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3626,7 +3658,7 @@ test_reset_bdev_ctrlr(void) SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -3915,7 +3947,7 @@ test_retry_io_if_ana_state_is_updating(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, -1, 1); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4042,7 +4074,7 @@ test_retry_io_for_io_path_error(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4137,7 +4169,7 @@ test_retry_io_for_io_path_error(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4245,7 +4277,7 @@ test_retry_io_count(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4431,7 +4463,7 @@ test_concurrent_read_ana_log_page(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4526,7 +4558,7 @@ test_retry_io_for_ana_error(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4672,7 +4704,7 @@ test_retry_admin_passthru_for_path_error(void) memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4754,7 +4786,7 @@ test_retry_admin_passthru_for_path_error(void) memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid)); rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, true); + attach_ctrlr_done, NULL, NULL, true, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4846,7 +4878,7 @@ test_retry_admin_passthru_by_count(void) g_ut_attach_bdev_count = 1; rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, - attach_ctrlr_done, NULL, NULL, false); + attach_ctrlr_done, NULL, NULL, false, 0, 0); CU_ASSERT(rc == 0); spdk_delay_us(1000); @@ -4938,6 +4970,554 @@ test_retry_admin_passthru_by_count(void) g_opts.bdev_retry_count = 0; } +static void +test_check_multipath_params(void) +{ + /* 1st parameter is ctrlr_loss_timeout_sec and 2nd parameter is reconnect_delay_sec. */ + CU_ASSERT(bdev_nvme_check_multipath_params(-2, 1) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, 0) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(1, 0) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(1, 2) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(0, 1) == false); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, 1) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(2, 2) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(2, 1) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(INT32_MAX, INT32_MAX) == true); + CU_ASSERT(bdev_nvme_check_multipath_params(-1, UINT32_MAX) == true); +} + +static void +test_retry_io_if_ctrlr_is_resetting(void) +{ + struct nvme_path_id path = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_bdev_ctrlr *nbdev_ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + struct nvme_bdev *bdev; + struct nvme_ns *nvme_ns; + struct spdk_bdev_io *bdev_io1, *bdev_io2; + struct spdk_io_channel *ch; + struct nvme_bdev_channel *nbdev_ch; + struct nvme_io_path *io_path; + struct nvme_ctrlr_channel *ctrlr_ch; + int rc; + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&path.trid); + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false, -1, 1); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL); + + nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid); + CU_ASSERT(nvme_ctrlr != NULL); + + bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1); + CU_ASSERT(bdev != NULL); + + nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr); + CU_ASSERT(nvme_ns != NULL); + + bdev_io1 = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL); + ut_bdev_io_set_buf(bdev_io1); + + bdev_io2 = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL); + ut_bdev_io_set_buf(bdev_io2); + + ch = spdk_get_io_channel(bdev); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + nbdev_ch = spdk_io_channel_get_ctx(ch); + + io_path = ut_get_io_path_by_ctrlr(nbdev_ch, nvme_ctrlr); + SPDK_CU_ASSERT_FATAL(io_path != NULL); + + ctrlr_ch = io_path->ctrlr_ch; + SPDK_CU_ASSERT_FATAL(ctrlr_ch != NULL); + SPDK_CU_ASSERT_FATAL(ctrlr_ch->qpair != NULL); + + bdev_io1->internal.ch = (struct spdk_bdev_channel *)ch; + bdev_io2->internal.ch = (struct spdk_bdev_channel *)ch; + + /* If qpair is connected, I/O should succeed. */ + bdev_io1->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io1); + CU_ASSERT(bdev_io1->internal.in_submit_request == true); + + poll_threads(); + CU_ASSERT(bdev_io1->internal.in_submit_request == false); + CU_ASSERT(bdev_io1->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS); + + /* If qpair is disconnected, it is freed and then reconnected via resetting + * the corresponding nvme_ctrlr. I/O should be queued if it is submitted + * while resetting the nvme_ctrlr. + */ + ctrlr_ch->qpair->is_connected = false; + ctrlr->is_failed = true; + + poll_thread_times(0, 4); + + CU_ASSERT(ctrlr_ch->qpair == NULL); + CU_ASSERT(nvme_ctrlr->resetting == true); + CU_ASSERT(ctrlr->is_failed == false); + + bdev_io1->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io1); + + spdk_delay_us(1); + + bdev_io2->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io2); + + CU_ASSERT(bdev_io1->internal.in_submit_request == true); + CU_ASSERT(bdev_io2->internal.in_submit_request == true); + CU_ASSERT(bdev_io1 == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + CU_ASSERT(bdev_io2 == TAILQ_NEXT(bdev_io1, module_link)); + + poll_threads(); + + CU_ASSERT(ctrlr_ch->qpair != NULL); + CU_ASSERT(nvme_ctrlr->resetting == false); + + spdk_delay_us(999999); + + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1); + CU_ASSERT(bdev_io1->internal.in_submit_request == true); + CU_ASSERT(bdev_io2->internal.in_submit_request == true); + CU_ASSERT(bdev_io2 == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + poll_threads(); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io1->internal.in_submit_request == false); + CU_ASSERT(bdev_io1->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + CU_ASSERT(bdev_io2->internal.in_submit_request == true); + CU_ASSERT(bdev_io2 == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + spdk_delay_us(1); + + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1); + CU_ASSERT(bdev_io2->internal.in_submit_request == true); + CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list)); + + poll_threads(); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io2->internal.in_submit_request == false); + CU_ASSERT(bdev_io2->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + + free(bdev_io1); + free(bdev_io2); + + spdk_put_io_channel(ch); + + poll_threads(); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL); +} + +static void +test_retry_admin_passthru_if_ctrlr_is_resetting(void) +{ + struct nvme_path_id path = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_bdev_ctrlr *nbdev_ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + struct nvme_bdev *bdev; + struct spdk_bdev_io *admin_io; + struct spdk_io_channel *ch; + struct nvme_bdev_channel *nbdev_ch; + int rc; + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&path.trid); + + g_opts.bdev_retry_count = 1; + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false, -1, 1); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL); + + nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid); + CU_ASSERT(nvme_ctrlr != NULL); + + bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1); + CU_ASSERT(bdev != NULL); + + admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL); + admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + + ch = spdk_get_io_channel(bdev); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + nbdev_ch = spdk_io_channel_get_ctx(ch); + + admin_io->internal.ch = (struct spdk_bdev_channel *)ch; + + /* If ctrlr is available, admin passthrough should succeed. */ + admin_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, admin_io); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + CU_ASSERT(admin_io->internal.in_submit_request == true); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(admin_io->internal.in_submit_request == false); + CU_ASSERT(admin_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS); + + /* If ctrlr is resetting, admin passthrough request should be queued + * if it is submitted while resetting ctrlr. + */ + bdev_nvme_reset(nvme_ctrlr); + + poll_thread_times(0, 1); + + admin_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, admin_io); + + CU_ASSERT(admin_io->internal.in_submit_request == true); + CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + + spdk_delay_us(1000000); + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + CU_ASSERT(admin_io->internal.in_submit_request == true); + CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list)); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0); + CU_ASSERT(admin_io->internal.in_submit_request == false); + CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + + free(admin_io); + + spdk_put_io_channel(ch); + + poll_threads(); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL); + + g_opts.bdev_retry_count = 0; +} + +static void +test_reconnect_ctrlr(void) +{ + struct spdk_nvme_transport_id trid = {}; + struct spdk_nvme_ctrlr ctrlr = {}; + struct nvme_ctrlr *nvme_ctrlr; + struct spdk_io_channel *ch1, *ch2; + struct nvme_ctrlr_channel *ctrlr_ch1, *ctrlr_ch2; + int rc; + + ut_init_trid(&trid); + TAILQ_INIT(&ctrlr.active_io_qpairs); + + set_thread(0); + + rc = nvme_ctrlr_create(&ctrlr, "nvme0", &trid, NULL); + CU_ASSERT(rc == 0); + + nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL); + + nvme_ctrlr->ctrlr_loss_timeout_sec = 2; + nvme_ctrlr->reconnect_delay_sec = 1; + + ch1 = spdk_get_io_channel(nvme_ctrlr); + SPDK_CU_ASSERT_FATAL(ch1 != NULL); + + ctrlr_ch1 = spdk_io_channel_get_ctx(ch1); + CU_ASSERT(ctrlr_ch1->qpair != NULL); + + set_thread(1); + + ch2 = spdk_get_io_channel(nvme_ctrlr); + SPDK_CU_ASSERT_FATAL(ch2 != NULL); + + ctrlr_ch2 = spdk_io_channel_get_ctx(ch2); + + /* Reset starts from thread 1. */ + set_thread(1); + + /* The reset should fail and a reconnect timer should be registered. */ + ctrlr.fail_reset = true; + ctrlr.is_failed = true; + + rc = bdev_nvme_reset(nvme_ctrlr); + CU_ASSERT(rc == 0); + CU_ASSERT(nvme_ctrlr->resetting == true); + CU_ASSERT(ctrlr.is_failed == true); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr.is_failed == false); + CU_ASSERT(ctrlr_ch1->qpair == NULL); + CU_ASSERT(ctrlr_ch2->qpair == NULL); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL); + CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true); + + /* Then a reconnect retry should suceeed. */ + ctrlr.fail_reset = false; + + spdk_delay_us(SPDK_SEC_TO_USEC); + poll_thread_times(0, 1); + + CU_ASSERT(nvme_ctrlr->resetting == true); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr_ch1->qpair != NULL); + CU_ASSERT(ctrlr_ch2->qpair != NULL); + CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false); + + /* The reset should fail and a reconnect timer should be registered. */ + ctrlr.fail_reset = true; + ctrlr.is_failed = true; + + rc = bdev_nvme_reset(nvme_ctrlr); + CU_ASSERT(rc == 0); + CU_ASSERT(nvme_ctrlr->resetting == true); + CU_ASSERT(ctrlr.is_failed == true); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr.is_failed == false); + CU_ASSERT(ctrlr_ch1->qpair == NULL); + CU_ASSERT(ctrlr_ch2->qpair == NULL); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL); + CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true); + + /* Then a reconnect retry should still fail. */ + spdk_delay_us(SPDK_SEC_TO_USEC); + poll_thread_times(0, 1); + + CU_ASSERT(nvme_ctrlr->resetting == true); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr.is_failed == false); + CU_ASSERT(ctrlr_ch1->qpair == NULL); + CU_ASSERT(ctrlr_ch2->qpair == NULL); + CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == false); + + /* Then a reconnect retry should still fail and the ctrlr should be deleted. */ + spdk_delay_us(SPDK_SEC_TO_USEC); + poll_threads(); + + CU_ASSERT(nvme_ctrlr == nvme_ctrlr_get_by_name("nvme0")); + CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == true); + CU_ASSERT(nvme_ctrlr->destruct == true); + + spdk_put_io_channel(ch2); + + set_thread(0); + + spdk_put_io_channel(ch1); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL); +} + +static struct nvme_path_id * +ut_get_path_id_by_trid(struct nvme_ctrlr *nvme_ctrlr, + const struct spdk_nvme_transport_id *trid) +{ + struct nvme_path_id *p; + + TAILQ_FOREACH(p, &nvme_ctrlr->trids, link) { + if (spdk_nvme_transport_id_compare(&p->trid, trid) == 0) { + break; + } + } + + return p; +} + +static void +test_retry_failover_ctrlr(void) +{ + struct spdk_nvme_transport_id trid1 = {}, trid2 = {}, trid3 = {}; + struct spdk_nvme_ctrlr ctrlr = {}; + struct nvme_ctrlr *nvme_ctrlr = NULL; + struct nvme_path_id *path_id1, *path_id2, *path_id3; + struct spdk_io_channel *ch; + struct nvme_ctrlr_channel *ctrlr_ch; + int rc; + + ut_init_trid(&trid1); + ut_init_trid2(&trid2); + ut_init_trid3(&trid3); + TAILQ_INIT(&ctrlr.active_io_qpairs); + + set_thread(0); + + rc = nvme_ctrlr_create(&ctrlr, "nvme0", &trid1, NULL); + CU_ASSERT(rc == 0); + + nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL); + + nvme_ctrlr->ctrlr_loss_timeout_sec = -1; + nvme_ctrlr->reconnect_delay_sec = 1; + + rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, &ctrlr, &trid2); + CU_ASSERT(rc == 0); + + rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, &ctrlr, &trid3); + CU_ASSERT(rc == 0); + + ch = spdk_get_io_channel(nvme_ctrlr); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + ctrlr_ch = spdk_io_channel_get_ctx(ch); + + path_id1 = ut_get_path_id_by_trid(nvme_ctrlr, &trid1); + SPDK_CU_ASSERT_FATAL(path_id1 != NULL); + CU_ASSERT(path_id1->is_failed == false); + CU_ASSERT(path_id1 == nvme_ctrlr->active_path_id); + + /* If reset failed and reconnect is scheduled, path_id is switched from trid1 to trid2. */ + ctrlr.fail_reset = true; + ctrlr.is_failed = true; + + rc = bdev_nvme_reset(nvme_ctrlr); + CU_ASSERT(rc == 0); + + poll_threads(); + + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr.is_failed == false); + CU_ASSERT(ctrlr_ch->qpair == NULL); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL); + CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true); + + CU_ASSERT(path_id1->is_failed == true); + + path_id2 = ut_get_path_id_by_trid(nvme_ctrlr, &trid2); + SPDK_CU_ASSERT_FATAL(path_id2 != NULL); + CU_ASSERT(path_id2->is_failed == false); + CU_ASSERT(path_id2 == nvme_ctrlr->active_path_id); + + /* If we remove trid2 while reconnect is scheduled, trid2 is removed and path_id is + * switched to trid3 but reset is not started. + */ + rc = bdev_nvme_failover(nvme_ctrlr, true); + CU_ASSERT(rc == 0); + + CU_ASSERT(ut_get_path_id_by_trid(nvme_ctrlr, &trid2) == NULL); + + path_id3 = ut_get_path_id_by_trid(nvme_ctrlr, &trid3); + SPDK_CU_ASSERT_FATAL(path_id3 != NULL); + CU_ASSERT(path_id3->is_failed == false); + CU_ASSERT(path_id3 == nvme_ctrlr->active_path_id); + + CU_ASSERT(nvme_ctrlr->resetting == false); + + /* If reconnect succeeds, trid3 should be the active path_id */ + ctrlr.fail_reset = false; + + spdk_delay_us(SPDK_SEC_TO_USEC); + poll_thread_times(0, 1); + + CU_ASSERT(nvme_ctrlr->resetting == true); + CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL); + + poll_threads(); + + CU_ASSERT(path_id3->is_failed == false); + CU_ASSERT(path_id3 == nvme_ctrlr->active_path_id); + CU_ASSERT(nvme_ctrlr->resetting == false); + CU_ASSERT(ctrlr_ch->qpair != NULL); + CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false); + + spdk_put_io_channel(ch); + + poll_threads(); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL); +} + int main(int argc, const char **argv) { @@ -4979,6 +5559,11 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_retry_io_for_ana_error); CU_ADD_TEST(suite, test_retry_admin_passthru_for_path_error); CU_ADD_TEST(suite, test_retry_admin_passthru_by_count); + CU_ADD_TEST(suite, test_check_multipath_params); + CU_ADD_TEST(suite, test_retry_io_if_ctrlr_is_resetting); + CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting); + CU_ADD_TEST(suite, test_reconnect_ctrlr); + CU_ADD_TEST(suite, test_retry_failover_ctrlr); CU_basic_set_mode(CU_BRM_VERBOSE);