From 549be9ad81aeda0e88955e78d5e04ee4b51e204f Mon Sep 17 00:00:00 2001 From: "sijie.sun" Date: Thu, 24 Nov 2022 10:16:30 +0800 Subject: [PATCH] nvmf/rdma: Recreate resources and listeners after IB device is hotplugged IB device may be unplugged & hotplugged when modifying slaves of bonding IB devices. This patch will try to recreate ibv device contexts, poller and listeners after IB devices come back. Signed-off-by: sijie.sun Change-Id: I3288174bad847edc2d9859cb34aa93c6af8c673b Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15616 Tested-by: SPDK CI Jenkins Reviewed-by: Aleksey Marchuk Reviewed-by: Shuhei Matsumoto --- lib/nvmf/rdma.c | 323 +++++++++++++++++++++++++---- test/nvmf/target/device_removal.sh | 49 ++++- 2 files changed, 328 insertions(+), 44 deletions(-) diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index e0e463670..9ac2a4546 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -444,6 +444,7 @@ struct spdk_nvmf_rdma_device { int num_srq; bool need_destroy; bool ready_to_destroy; + bool is_ready; TAILQ_ENTRY(spdk_nvmf_rdma_device) link; }; @@ -482,6 +483,9 @@ struct spdk_nvmf_rdma_transport { TAILQ_HEAD(, spdk_nvmf_rdma_device) devices; TAILQ_HEAD(, spdk_nvmf_rdma_port) ports; TAILQ_HEAD(, spdk_nvmf_rdma_poll_group) poll_groups; + + /* ports that are removed unexpectedly and need retry listen */ + TAILQ_HEAD(, spdk_nvmf_rdma_port) retry_ports; }; struct poller_manage_ctx { @@ -913,7 +917,6 @@ nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair) spdk_rdma_qp_destroy(rqpair->rdma_qp); rqpair->rdma_qp = NULL; } - rdma_destroy_id(rqpair->cm_id); if (rqpair->poller != NULL && rqpair->srq == NULL) { rqpair->poller->required_num_wr -= MAX_WR_PER_QP(rqpair->max_queue_depth); @@ -934,6 +937,12 @@ nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair) if (rqpair->poller && rqpair->poller->need_destroy && RB_EMPTY(&rqpair->poller->qpairs)) { nvmf_rdma_poller_destroy(rqpair->poller); } + + /* destroy cm_id last so cma device will not be freed before we destroy the cq. */ + if (rqpair->cm_id) { + rdma_destroy_id(rqpair->cm_id); + } + free(rqpair); } @@ -2387,6 +2396,10 @@ nvmf_rdma_is_rxe_device(struct spdk_nvmf_rdma_device *device) } static int nvmf_rdma_accept(void *ctx); +static bool nvmf_rdma_retry_listen_port(struct spdk_nvmf_rdma_transport *rtransport); +static void destroy_ib_device(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_device *device); + static int create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context *context, struct spdk_nvmf_rdma_device **new_device) @@ -2448,6 +2461,7 @@ create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context if (!device->pd) { SPDK_ERRLOG("Unable to allocate protection domain.\n"); + destroy_ib_device(rtransport, device); return -ENOMEM; } @@ -2456,6 +2470,7 @@ create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context device->map = spdk_rdma_create_mem_map(device->pd, &g_nvmf_hooks, SPDK_RDMA_MEMORY_MAP_ROLE_TARGET); if (!device->map) { SPDK_ERRLOG("Unable to allocate memory map for listen address\n"); + destroy_ib_device(rtransport, device); return -ENOMEM; } @@ -2465,6 +2480,9 @@ create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context if (new_device) { *new_device = device; } + SPDK_NOTICELOG("Create IB device %s(%p/%p) succeed.\n", ibv_get_device_name(context->device), + device, context); + return 0; } @@ -2533,6 +2551,7 @@ nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts) TAILQ_INIT(&rtransport->devices); TAILQ_INIT(&rtransport->ports); TAILQ_INIT(&rtransport->poll_groups); + TAILQ_INIT(&rtransport->retry_ports); rtransport->transport.ops = &spdk_nvmf_transport_rdma; rtransport->rdma_opts.num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE; @@ -2660,6 +2679,7 @@ nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts) } i++; max_device_sge = spdk_min(max_device_sge, device->attr.max_sge); + device->is_ready = true; } rdma_free_devices(contexts); @@ -2707,8 +2727,7 @@ destroy_ib_device(struct spdk_nvmf_rdma_transport *rtransport, ibv_dealloc_pd(device->pd); } } - SPDK_NOTICELOG("IB device %s[%p] is destroyed.\n", ibv_get_device_name(device->context->device), - device); + SPDK_DEBUGLOG(rdma, "IB device [%p] is destroyed.\n", device); free(device); } @@ -2738,6 +2757,11 @@ nvmf_rdma_destroy(struct spdk_nvmf_transport *transport, rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, port_tmp) { + TAILQ_REMOVE(&rtransport->retry_ports, port, link); + free(port); + } + TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) { TAILQ_REMOVE(&rtransport->ports, port, link); rdma_destroy_id(port->id); @@ -2778,17 +2802,20 @@ static int nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id, struct spdk_nvme_transport_id *trid, bool peer); +static bool nvmf_rdma_rescan_devices(struct spdk_nvmf_rdma_transport *rtransport); + static int nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_transport_id *trid, struct spdk_nvmf_listen_opts *listen_opts) { struct spdk_nvmf_rdma_transport *rtransport; struct spdk_nvmf_rdma_device *device; - struct spdk_nvmf_rdma_port *port; + struct spdk_nvmf_rdma_port *port, *tmp_port; struct addrinfo *res; struct addrinfo hints; int family; int rc; + bool is_retry = false; if (!strlen(trid->trsvcid)) { SPDK_ERRLOG("Service id is required\n"); @@ -2844,7 +2871,15 @@ nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_t freeaddrinfo(res); if (rc < 0) { - SPDK_ERRLOG("rdma_bind_addr() failed\n"); + TAILQ_FOREACH(tmp_port, &rtransport->retry_ports, link) { + if (spdk_nvme_transport_id_compare(tmp_port->trid, trid) == 0) { + is_retry = true; + break; + } + } + if (!is_retry) { + SPDK_ERRLOG("rdma_bind_addr() failed\n"); + } rdma_destroy_id(port->id); free(port); return rc; @@ -2866,7 +2901,7 @@ nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_t } TAILQ_FOREACH(device, &rtransport->devices, link) { - if (device->context == port->id->verbs) { + if (device->context == port->id->verbs && device->is_ready) { port->device = device; break; } @@ -2887,34 +2922,59 @@ nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_t } static void -nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport, - const struct spdk_nvme_transport_id *trid) +nvmf_rdma_stop_listen_ex(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid, bool need_retry) { - struct spdk_nvmf_rdma_transport *rtransport; - struct spdk_nvmf_rdma_port *port, *tmp; + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_port *port, *tmp; rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + if (!need_retry) { + TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, tmp) { + if (spdk_nvme_transport_id_compare(port->trid, trid) == 0) { + TAILQ_REMOVE(&rtransport->retry_ports, port, link); + free(port); + } + } + } + TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, tmp) { if (spdk_nvme_transport_id_compare(port->trid, trid) == 0) { + SPDK_DEBUGLOG(rdma, "Port %s:%s removed. need retry: %d\n", + port->trid->traddr, port->trid->trsvcid, need_retry); TAILQ_REMOVE(&rtransport->ports, port, link); rdma_destroy_id(port->id); - free(port); + port->id = NULL; + port->device = NULL; + if (need_retry) { + TAILQ_INSERT_TAIL(&rtransport->retry_ports, port, link); + } else { + free(port); + } break; } } } +static void +nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport, + const struct spdk_nvme_transport_id *trid) +{ + nvmf_rdma_stop_listen_ex(transport, trid, false); +} + +static void _nvmf_rdma_register_poller_in_group(void *c); static void _nvmf_rdma_remove_poller_in_group(void *c); static bool -nvmf_rdma_all_pollers_are_destroyed(void *c) +nvmf_rdma_all_pollers_management_done(void *c) { struct poller_manage_ctx *ctx = c; int counter; counter = __atomic_sub_fetch(ctx->inflight_op_counter, 1, __ATOMIC_SEQ_CST); - SPDK_DEBUGLOG(rdma, "nvmf_rdma_all_pollers_are_destroyed called. counter: %d, poller: %p\n", + SPDK_DEBUGLOG(rdma, "nvmf_rdma_all_pollers_management_done called. counter: %d, poller: %p\n", counter, ctx->rpoller); if (counter == 0) { @@ -2926,9 +2986,8 @@ nvmf_rdma_all_pollers_are_destroyed(void *c) } static int -nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport, - struct spdk_nvmf_rdma_device *device, - bool *has_inflight) +nvmf_rdma_manage_poller(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_device *device, bool *has_inflight, bool is_add) { struct spdk_nvmf_rdma_poll_group *rgroup; struct spdk_nvmf_rdma_poller *rpoller; @@ -2939,7 +2998,7 @@ nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport, spdk_msg_fn do_fn; *has_inflight = false; - do_fn = _nvmf_rdma_remove_poller_in_group; + do_fn = is_add ? _nvmf_rdma_register_poller_in_group : _nvmf_rdma_remove_poller_in_group; inflight_counter = calloc(1, sizeof(int)); if (!inflight_counter) { SPDK_ERRLOG("Failed to allocate inflight counter when removing pollers\n"); @@ -2958,7 +3017,7 @@ nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport, break; } } - if (!found) { + if (found == is_add) { __atomic_fetch_sub(inflight_counter, 1, __ATOMIC_SEQ_CST); continue; } @@ -2995,6 +3054,167 @@ nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport, return 0; } +static void nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_device *device); + +static struct spdk_nvmf_rdma_device * +nvmf_rdma_find_ib_device(struct spdk_nvmf_rdma_transport *rtransport, + struct ibv_context *context) +{ + struct spdk_nvmf_rdma_device *device, *tmp_device; + + TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp_device) { + if (device->need_destroy) { + continue; + } + + if (strcmp(device->context->device->dev_name, context->device->dev_name) == 0) { + return device; + } + } + + return NULL; +} + +static bool +nvmf_rdma_check_devices_context(struct spdk_nvmf_rdma_transport *rtransport, + struct ibv_context *context) +{ + struct spdk_nvmf_rdma_device *old_device, *new_device; + int rc = 0; + bool has_inflight; + + old_device = nvmf_rdma_find_ib_device(rtransport, context); + + if (old_device) { + if (old_device->context != context && !old_device->need_destroy && old_device->is_ready) { + /* context may not have time to be cleaned when rescan. exactly one context + * is valid for a device so this context must be invalid and just remove it. */ + SPDK_WARNLOG("Device %p has a invalid context %p\n", old_device, old_device->context); + old_device->need_destroy = true; + nvmf_rdma_handle_device_removal(rtransport, old_device); + } + return false; + } + + rc = create_ib_device(rtransport, context, &new_device); + /* TODO: update transport opts. */ + if (rc < 0) { + SPDK_ERRLOG("Failed to create ib device for context: %s(%p)\n", + ibv_get_device_name(context->device), context); + return false; + } + + rc = nvmf_rdma_manage_poller(rtransport, new_device, &has_inflight, true); + if (rc < 0) { + SPDK_ERRLOG("Failed to add poller for device context: %s(%p)\n", + ibv_get_device_name(context->device), context); + return false; + } + + if (has_inflight) { + new_device->is_ready = true; + } + + return true; +} + +static bool +nvmf_rdma_rescan_devices(struct spdk_nvmf_rdma_transport *rtransport) +{ + struct spdk_nvmf_rdma_device *device; + struct ibv_device **ibv_device_list = NULL; + struct ibv_context **contexts = NULL; + int i = 0; + int num_dev = 0; + bool new_create = false, has_new_device = false; + struct ibv_context *tmp_verbs = NULL; + + /* do not rescan when any device is destroying, or context may be freed when + * regenerating the poll fds. + */ + TAILQ_FOREACH(device, &rtransport->devices, link) { + if (device->need_destroy) { + return false; + } + } + + ibv_device_list = ibv_get_device_list(&num_dev); + + /* There is a bug in librdmacm. If verbs init failed in rdma_get_devices, it'll be + * marked as dead verbs and never be init again. So we need to make sure the + * verbs is available before we call rdma_get_devices. */ + if (num_dev >= 0) { + for (i = 0; i < num_dev; i++) { + tmp_verbs = ibv_open_device(ibv_device_list[i]); + if (!tmp_verbs) { + SPDK_WARNLOG("Failed to init ibv device %p, err %d. Skip rescan.\n", ibv_device_list[i], errno); + break; + } + if (nvmf_rdma_find_ib_device(rtransport, tmp_verbs) == NULL) { + SPDK_DEBUGLOG(rdma, "Find new verbs init ibv device %p(%s).\n", ibv_device_list[i], + tmp_verbs->device->dev_name); + has_new_device = true; + } + ibv_close_device(tmp_verbs); + } + ibv_free_device_list(ibv_device_list); + if (!tmp_verbs || !has_new_device) { + return false; + } + } + + contexts = rdma_get_devices(NULL); + + for (i = 0; contexts && contexts[i] != NULL; i++) { + new_create |= nvmf_rdma_check_devices_context(rtransport, contexts[i]); + } + + if (new_create) { + free_poll_fds(rtransport); + generate_poll_fds(rtransport); + } + + if (contexts) { + rdma_free_devices(contexts); + } + + return new_create; +} + +static bool +nvmf_rdma_retry_listen_port(struct spdk_nvmf_rdma_transport *rtransport) +{ + struct spdk_nvmf_rdma_port *port, *tmp_port; + int rc = 0; + bool new_create = false; + + if (TAILQ_EMPTY(&rtransport->retry_ports)) { + return false; + } + + new_create = nvmf_rdma_rescan_devices(rtransport); + + TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, tmp_port) { + rc = nvmf_rdma_listen(&rtransport->transport, port->trid, NULL); + + TAILQ_REMOVE(&rtransport->retry_ports, port, link); + if (rc) { + if (new_create) { + SPDK_ERRLOG("Found new IB device but port %s:%s is still failed(%d) to listen.\n", + port->trid->traddr, port->trid->trsvcid, rc); + } + TAILQ_INSERT_TAIL(&rtransport->retry_ports, port, link); + break; + } else { + SPDK_NOTICELOG("Port %s:%s come back\n", port->trid->traddr, port->trid->trsvcid); + free(port); + } + } + + return true; +} + static void nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport, struct spdk_nvmf_rdma_qpair *rqpair, bool drain) @@ -3200,7 +3420,7 @@ nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport, int rc; bool has_inflight; - rc = nvmf_rdma_remove_pollers_on_dev(rtransport, device, &has_inflight); + rc = nvmf_rdma_manage_poller(rtransport, device, &has_inflight, false); if (rc) { SPDK_ERRLOG("Failed to handle device removal, rc %d\n", rc); return; @@ -3223,7 +3443,7 @@ nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport, * RDMA transport. when the device comes back we can retry listening * and the application's workflow will not be interrupted. */ - nvmf_rdma_stop_listen(&rtransport->transport, port->trid); + nvmf_rdma_stop_listen_ex(&rtransport->transport, port->trid, true); } } } @@ -3232,7 +3452,7 @@ static void nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport, struct rdma_cm_event *event) { - struct spdk_nvmf_rdma_port *port; + struct spdk_nvmf_rdma_port *port, *tmp_port; struct spdk_nvmf_rdma_transport *rtransport; port = event->id->context; @@ -3240,9 +3460,15 @@ nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport, rdma_ack_cm_event(event); - if (!port->device->need_destroy) { - port->device->need_destroy = true; - nvmf_rdma_handle_device_removal(rtransport, port->device); + /* if device removal happens during ctrl qpair disconnecting, it's possible that we receive + * an DEVICE_REMOVAL event on qpair but the id->qp is just NULL. So we should make sure that + * we are handling a port event here. + */ + TAILQ_FOREACH(tmp_port, &rtransport->ports, link) { + if (port == tmp_port && port->device && !port->device->need_destroy) { + port->device->need_destroy = true; + nvmf_rdma_handle_device_removal(rtransport, port->device); + } } } @@ -3315,11 +3541,15 @@ nvmf_process_cm_event(struct spdk_nvmf_transport *transport) * don't make attempts to call any ibv_query/modify/create functions. We can only call * ibv_destroy* functions to release user space memory allocated by IB. All kernel * resources are already cleaned. */ - if (!event->id->qp) { + if (event->id->qp) { /* If rdma_cm event has a valid `qp` pointer then the event refers to the - * corresponding qpair. Otherwise the event refers to a listening device. - * Only handle this event on device because we will disconnect all qpairs - * when removing device */ + * corresponding qpair. Otherwise the event refers to a listening device. */ + rc = nvmf_rdma_disconnect(event); + if (rc < 0) { + SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc); + break; + } + } else { nvmf_rdma_handle_cm_event_port_removal(transport, event); event_acked = true; } @@ -3526,12 +3756,15 @@ nvmf_rdma_accept(void *ctx) struct spdk_nvmf_rdma_device *device, *tmp; uint32_t count; short revents; + bool do_retry; rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); + do_retry = nvmf_rdma_retry_listen_port(rtransport); + count = nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0); if (nfds <= 0) { - return SPDK_POLLER_IDLE; + return do_retry ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; } /* The first poll descriptor is RDMA CM event */ @@ -3684,6 +3917,25 @@ nvmf_rdma_poller_create(struct spdk_nvmf_rdma_transport *rtransport, return 0; } +static void +_nvmf_rdma_register_poller_in_group(void *c) +{ + struct spdk_nvmf_rdma_poller *poller; + struct poller_manage_ctx *ctx = c; + struct spdk_nvmf_rdma_device *device; + int rc; + + rc = nvmf_rdma_poller_create(ctx->rtransport, ctx->rgroup, ctx->device, &poller); + if (rc < 0 && poller) { + nvmf_rdma_poller_destroy(poller); + } + + device = ctx->device; + if (nvmf_rdma_all_pollers_management_done(ctx)) { + device->is_ready = true; + } +} + static void nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group); static struct spdk_nvmf_transport_poll_group * @@ -3902,6 +4154,11 @@ nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group, return -1; } + if (poller->need_destroy) { + SPDK_ERRLOG("Poller is destroying.\n"); + return -1; + } + rqpair->poller = poller; rqpair->srq = rqpair->poller->srq; @@ -4419,7 +4676,7 @@ _nvmf_rdma_remove_poller_in_group_cb(void *c) struct spdk_nvmf_rdma_device *device = ctx->device; struct spdk_thread *thread = ctx->thread; - if (nvmf_rdma_all_pollers_are_destroyed(c)) { + if (nvmf_rdma_all_pollers_management_done(c)) { /* destroy device when last poller is destroyed */ device->ready_to_destroy = true; spdk_thread_send_msg(thread, _nvmf_rdma_remove_destroyed_device, rtransport); @@ -4429,19 +4686,15 @@ _nvmf_rdma_remove_poller_in_group_cb(void *c) static void _nvmf_rdma_remove_poller_in_group(void *c) { - struct spdk_nvmf_rdma_qpair *rqpair, *tmp_qpair; struct poller_manage_ctx *ctx = c; ctx->rpoller->need_destroy = true; ctx->rpoller->destroy_cb_ctx = ctx; ctx->rpoller->destroy_cb = _nvmf_rdma_remove_poller_in_group_cb; + /* qp will be disconnected after receiving a RDMA_CM_EVENT_DEVICE_REMOVAL event. */ if (RB_EMPTY(&ctx->rpoller->qpairs)) { nvmf_rdma_poller_destroy(ctx->rpoller); - } else { - RB_FOREACH_SAFE(rqpair, qpairs_tree, &ctx->rpoller->qpairs, tmp_qpair) { - spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL); - } } } diff --git a/test/nvmf/target/device_removal.sh b/test/nvmf/target/device_removal.sh index c32f0da4a..7ce9d1049 100755 --- a/test/nvmf/target/device_removal.sh +++ b/test/nvmf/target/device_removal.sh @@ -76,6 +76,17 @@ function get_rdma_device_name() { ls $(get_pci_dir $dev_name)/infiniband } +function check_rdma_dev_exists_in_nvmf_tgt() { + local rdma_dev_name=$1 + $rpc_py nvmf_get_stats | jq -r '.poll_groups[0].transports[].devices[].name' | grep "$rdma_dev_name" + return $? +} + +function get_rdma_dev_count_in_nvmf_tgt() { + local rdma_dev_name=$1 + $rpc_py nvmf_get_stats | jq -r '.poll_groups[0].transports[].devices | length' +} + function test_remove_and_rescan() { nvmfappstart -m 0xF @@ -91,7 +102,7 @@ function test_remove_and_rescan() { origin_ip=$(get_ip_address "$net_dev") pci_dir=$(get_pci_dir $net_dev) - if ! $rpc_py nvmf_get_stats | grep "\"name\": \"$rdma_dev_name\""; then + if ! check_rdma_dev_exists_in_nvmf_tgt "$rdma_dev_name"; then echo "Device $rdma_dev_name is not registered in tgt". exit 1 fi @@ -99,7 +110,7 @@ function test_remove_and_rescan() { remove_one_nic $net_dev for i in $(seq 1 10); do - if ! $rpc_py nvmf_get_stats | grep "\"name\": \"$rdma_dev_name\""; then + if ! check_rdma_dev_exists_in_nvmf_tgt "$rdma_dev_name"; then break fi if [[ $i == 10 ]]; then @@ -109,6 +120,8 @@ function test_remove_and_rescan() { sleep 1 done + ib_count_after_remove=$(get_rdma_dev_count_in_nvmf_tgt) + rescan_pci for i in $(seq 1 10); do @@ -132,9 +145,25 @@ function test_remove_and_rescan() { if [[ -z $(get_ip_address "$net_dev") ]]; then ip addr add $origin_ip/24 dev $net_dev fi + + # if rdma device name is renamed, nvmf_get_stats may return an obsoleted name. + # so we check ib device count here instead of the device name. + for i in $(seq 1 10); do + ib_count=$(get_rdma_dev_count_in_nvmf_tgt) + if ((ib_count > ib_count_after_remove)); then + break + fi + + if [[ $i == 10 ]]; then + # failed to rescan this device + exit 1 + fi + sleep 2 + done done - killprocess $nvmfpid + # NOTE: rdma-core <= v43.0 has memleak bug (fixed in commit 7720071f). + killprocess $nvmfpid || true nvmfpid= return 0 @@ -205,7 +234,7 @@ function test_bonding_slaves_on_nics() { create_subsystem_and_connect_on_netdev $BOND_NAME - ib_count=$($rpc_py nvmf_get_stats | grep devices -A 2 | grep -c name) + ib_count=$(get_rdma_dev_count_in_nvmf_tgt) echo "IB Count: " $ib_count $rootdir/scripts/fio-wrapper -p nvmf -i 4096 -d 1 -t randrw -r 10 & @@ -213,21 +242,23 @@ function test_bonding_slaves_on_nics() { sleep 2 echo -$nic1 | sudo tee /sys/class/net/${BOND_NAME}/bonding/slaves + sleep 10 + echo +$nic1 | sudo tee /sys/class/net/${BOND_NAME}/bonding/slaves ib_count2=$ib_count for i in $(seq 1 10); do - ib_count2=$($rpc_py nvmf_get_stats | grep devices -A 2 | grep -c name) - if ((ib_count2 < ib_count)); then + ib_count2=$(get_rdma_dev_count_in_nvmf_tgt) + if ((ib_count2 == ib_count)); then break fi sleep 2 done - if ((ib_count2 == ib_count)); then + if ((ib_count2 != ib_count)); then exit 1 fi - # fio will exit when nvmf fin. do not wait here because it may be in D state. - killprocess $nvmfpid + # NOTE: rdma-core <= v43.0 has memleak bug (fixed in commit 7720071f). + killprocess $nvmfpid || true nvmfpid= return 0 }