From 8ddc5cd4a7f553a9ee981ced391820100e42d889 Mon Sep 17 00:00:00 2001 From: "sijie.sun" Date: Thu, 24 Nov 2022 10:14:40 +0800 Subject: [PATCH] nvmf/rdma: Destroy all related resources after IB device removed When IBV_EVENT_DEVICE_FATAL & RDMA_CM_EVENT_DEVICE_REMOVAL occurs, destory all userspace resources such as qp, poller and ibv_context. Signed-off-by: sijie.sun Change-Id: Ie4832e4804eb572d6ec3bdc44fb7f9339f443d7e Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15615 Reviewed-by: Shuhei Matsumoto Reviewed-by: Ben Walker Tested-by: SPDK CI Jenkins Reviewed-by: Aleksey Marchuk --- lib/nvmf/rdma.c | 286 ++++++++++++++++++++++++++--- test/nvmf/nvmf.sh | 2 + test/nvmf/target/device_removal.sh | 251 +++++++++++++++++++++++++ 3 files changed, 517 insertions(+), 22 deletions(-) create mode 100755 test/nvmf/target/device_removal.sh diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index f893c921d..e0e463670 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -283,6 +283,8 @@ struct spdk_nvmf_rdma_resources { typedef void (*spdk_nvmf_rdma_qpair_ibv_event)(struct spdk_nvmf_rdma_qpair *rqpair); +typedef void (*spdk_poller_destroy_cb)(void *ctx); + struct spdk_nvmf_rdma_ibv_event_ctx { struct spdk_nvmf_rdma_qpair *rqpair; spdk_nvmf_rdma_qpair_ibv_event cb_fn; @@ -395,6 +397,7 @@ struct spdk_nvmf_rdma_poller { /* The maximum number of I/O outstanding on the shared receive queue at one time */ uint16_t max_srq_depth; + bool need_destroy; /* Shared receive queue */ struct spdk_rdma_srq *srq; @@ -402,6 +405,9 @@ struct spdk_nvmf_rdma_poller { struct spdk_nvmf_rdma_resources *resources; struct spdk_nvmf_rdma_poller_stat stat; + spdk_poller_destroy_cb destroy_cb; + void *destroy_cb_ctx; + RB_HEAD(qpairs_tree, spdk_nvmf_rdma_qpair) qpairs; STAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs_pending_recv; @@ -436,6 +442,8 @@ struct spdk_nvmf_rdma_device { struct ibv_pd *pd; int num_srq; + bool need_destroy; + bool ready_to_destroy; TAILQ_ENTRY(spdk_nvmf_rdma_device) link; }; @@ -476,6 +484,16 @@ struct spdk_nvmf_rdma_transport { TAILQ_HEAD(, spdk_nvmf_rdma_poll_group) poll_groups; }; +struct poller_manage_ctx { + struct spdk_nvmf_rdma_transport *rtransport; + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvmf_rdma_poller *rpoller; + struct spdk_nvmf_rdma_device *device; + + struct spdk_thread *thread; + volatile int *inflight_op_counter; +}; + static const struct spdk_json_object_decoder rdma_transport_opts_decoder[] = { { "num_cqe", offsetof(struct rdma_transport_opts, num_cqe), @@ -516,6 +534,8 @@ static void _poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport, static void _poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport, struct spdk_nvmf_rdma_poller *rpoller); +static void _nvmf_rdma_remove_destroyed_device(void *c); + static inline int nvmf_rdma_check_ibv_state(enum ibv_qp_state state) { @@ -831,6 +851,8 @@ nvmf_rdma_qpair_clean_ibv_events(struct spdk_nvmf_rdma_qpair *rqpair) } } +static void nvmf_rdma_poller_destroy(struct spdk_nvmf_rdma_poller *poller); + static void nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair) { @@ -909,6 +931,9 @@ nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair) rqpair->destruct_channel = NULL; } + if (rqpair->poller && rqpair->poller->need_destroy && RB_EMPTY(&rqpair->poller->qpairs)) { + nvmf_rdma_poller_destroy(rqpair->poller); + } free(rqpair); } @@ -2682,6 +2707,8 @@ destroy_ib_device(struct spdk_nvmf_rdma_transport *rtransport, ibv_dealloc_pd(device->pd); } } + SPDK_NOTICELOG("IB device %s[%p] is destroyed.\n", ibv_get_device_name(device->context->device), + device); free(device); } @@ -2878,6 +2905,96 @@ nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport, } } +static void _nvmf_rdma_remove_poller_in_group(void *c); + +static bool +nvmf_rdma_all_pollers_are_destroyed(void *c) +{ + struct poller_manage_ctx *ctx = c; + int counter; + + counter = __atomic_sub_fetch(ctx->inflight_op_counter, 1, __ATOMIC_SEQ_CST); + SPDK_DEBUGLOG(rdma, "nvmf_rdma_all_pollers_are_destroyed called. counter: %d, poller: %p\n", + counter, ctx->rpoller); + + if (counter == 0) { + free((void *)ctx->inflight_op_counter); + } + free(ctx); + + return counter == 0; +} + +static int +nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_device *device, + bool *has_inflight) +{ + struct spdk_nvmf_rdma_poll_group *rgroup; + struct spdk_nvmf_rdma_poller *rpoller; + struct spdk_nvmf_poll_group *poll_group; + struct poller_manage_ctx *ctx; + bool found; + int *inflight_counter; + spdk_msg_fn do_fn; + + *has_inflight = false; + do_fn = _nvmf_rdma_remove_poller_in_group; + inflight_counter = calloc(1, sizeof(int)); + if (!inflight_counter) { + SPDK_ERRLOG("Failed to allocate inflight counter when removing pollers\n"); + return -ENOMEM; + } + + TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) { + (*inflight_counter)++; + } + + TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) { + found = false; + TAILQ_FOREACH(rpoller, &rgroup->pollers, link) { + if (rpoller->device == device) { + found = true; + break; + } + } + if (!found) { + __atomic_fetch_sub(inflight_counter, 1, __ATOMIC_SEQ_CST); + continue; + } + + ctx = calloc(1, sizeof(struct poller_manage_ctx)); + if (!ctx) { + SPDK_ERRLOG("Failed to allocate poller_manage_ctx when removing pollers\n"); + if (!*has_inflight) { + free(inflight_counter); + } + return -ENOMEM; + } + + ctx->rtransport = rtransport; + ctx->rgroup = rgroup; + ctx->rpoller = rpoller; + ctx->device = device; + ctx->thread = spdk_get_thread(); + ctx->inflight_op_counter = inflight_counter; + *has_inflight = true; + + poll_group = rgroup->group.group; + if (poll_group->thread != spdk_get_thread()) { + spdk_thread_send_msg(poll_group->thread, do_fn, ctx); + } else { + do_fn(ctx); + } + } + + if (!*has_inflight) { + free(inflight_counter); + } + + return 0; +} + static void nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport, struct spdk_nvmf_rdma_qpair *rqpair, bool drain) @@ -2954,6 +3071,12 @@ nvmf_rdma_destroy_drained_qpair(struct spdk_nvmf_rdma_qpair *rqpair) return; } + /* device is already destroyed and we should force destroy this qpair. */ + if (rqpair->poller && rqpair->poller->need_destroy) { + nvmf_rdma_qpair_destroy(rqpair); + return; + } + /* In non SRQ path, we will reach rqpair->max_queue_depth. In SRQ path, we will get the last_wqe event. */ if (rqpair->current_send_depth != 0) { return; @@ -3069,6 +3192,42 @@ nvmf_rdma_handle_cm_event_addr_change(struct spdk_nvmf_transport *transport, return event_acked; } +static void +nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_device *device) +{ + struct spdk_nvmf_rdma_port *port, *port_tmp; + int rc; + bool has_inflight; + + rc = nvmf_rdma_remove_pollers_on_dev(rtransport, device, &has_inflight); + if (rc) { + SPDK_ERRLOG("Failed to handle device removal, rc %d\n", rc); + return; + } + + if (!has_inflight) { + /* no pollers, destroy the device */ + device->ready_to_destroy = true; + spdk_thread_send_msg(spdk_get_thread(), _nvmf_rdma_remove_destroyed_device, rtransport); + } + + TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) { + if (port->device == device) { + SPDK_NOTICELOG("Port %s:%s on device %s is being removed.\n", + port->trid->traddr, + port->trid->trsvcid, + ibv_get_device_name(port->device->context->device)); + + /* keep NVMF listener and only destroy structures of the + * RDMA transport. when the device comes back we can retry listening + * and the application's workflow will not be interrupted. + */ + nvmf_rdma_stop_listen(&rtransport->transport, port->trid); + } + } +} + static void nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport, struct rdma_cm_event *event) @@ -3079,14 +3238,11 @@ nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport, port = event->id->context; rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); - SPDK_NOTICELOG("Port %s:%s is being removed\n", port->trid->traddr, port->trid->trsvcid); - - nvmf_rdma_disconnect_qpairs_on_port(rtransport, port); - rdma_ack_cm_event(event); - while (spdk_nvmf_transport_stop_listen(transport, port->trid) == 0) { - ; + if (!port->device->need_destroy) { + port->device->need_destroy = true; + nvmf_rdma_handle_device_removal(rtransport, port->device); } } @@ -3159,15 +3315,11 @@ nvmf_process_cm_event(struct spdk_nvmf_transport *transport) * don't make attempts to call any ibv_query/modify/create functions. We can only call * ibv_destroy* functions to release user space memory allocated by IB. All kernel * resources are already cleaned. */ - if (event->id->qp) { + if (!event->id->qp) { /* If rdma_cm event has a valid `qp` pointer then the event refers to the - * corresponding qpair. Otherwise the event refers to a listening device */ - rc = nvmf_rdma_disconnect(event); - if (rc < 0) { - SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc); - break; - } - } else { + * corresponding qpair. Otherwise the event refers to a listening device. + * Only handle this event on device because we will disconnect all qpairs + * when removing device */ nvmf_rdma_handle_cm_event_port_removal(transport, event); event_acked = true; } @@ -3323,8 +3475,12 @@ nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device) break; } break; - case IBV_EVENT_CQ_ERR: case IBV_EVENT_DEVICE_FATAL: + SPDK_ERRLOG("Device Fatal event[%s] received on %s. device: %p\n", + ibv_event_type_str(event.event_type), ibv_get_device_name(device->context->device), device); + device->need_destroy = true; + break; + case IBV_EVENT_CQ_ERR: case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_PORT_ERR: case IBV_EVENT_LID_CHANGE: @@ -3369,6 +3525,7 @@ nvmf_rdma_accept(void *ctx) struct spdk_nvmf_rdma_transport *rtransport; struct spdk_nvmf_rdma_device *device, *tmp; uint32_t count; + short revents; rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); count = nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0); @@ -3389,8 +3546,17 @@ nvmf_rdma_accept(void *ctx) /* Second and subsequent poll descriptors are IB async events */ TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) { - if (rtransport->poll_fds[i++].revents & POLLIN) { - nvmf_process_ib_events(device, 32); + revents = rtransport->poll_fds[i++].revents; + if (revents & POLLIN) { + if (spdk_likely(!device->need_destroy)) { + nvmf_process_ib_events(device, 32); + if (spdk_unlikely(device->need_destroy)) { + nvmf_rdma_handle_device_removal(rtransport, device); + } + } + nfds--; + } else if (revents & POLLNVAL || revents & POLLHUP) { + SPDK_ERRLOG("Receive unknown revent %x on device %p\n", (int)revents, device); nfds--; } } @@ -3632,6 +3798,9 @@ static void nvmf_rdma_poller_destroy(struct spdk_nvmf_rdma_poller *poller) { struct spdk_nvmf_rdma_qpair *qpair, *tmp_qpair; + int rc; + + TAILQ_REMOVE(&poller->group->pollers, poller, link); RB_FOREACH_SAFE(qpair, qpairs_tree, &poller->qpairs, tmp_qpair) { nvmf_rdma_qpair_destroy(qpair); } @@ -3645,7 +3814,15 @@ nvmf_rdma_poller_destroy(struct spdk_nvmf_rdma_poller *poller) } if (poller->cq) { - ibv_destroy_cq(poller->cq); + rc = ibv_destroy_cq(poller->cq); + if (rc != 0) { + SPDK_ERRLOG("Destroy cq return %d, error: %s\n", rc, strerror(errno)); + } + } + + if (poller->destroy_cb) { + poller->destroy_cb(poller->destroy_cb_ctx); + poller->destroy_cb = NULL; } free(poller); @@ -3664,7 +3841,6 @@ nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) } TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) { - TAILQ_REMOVE(&rgroup->pollers, poller, link); nvmf_rdma_poller_destroy(poller); } @@ -4055,12 +4231,23 @@ nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport, struct spdk_nvmf_rdma_wr *rdma_wr; struct spdk_nvmf_rdma_request *rdma_req; struct spdk_nvmf_rdma_recv *rdma_recv; - struct spdk_nvmf_rdma_qpair *rqpair; + struct spdk_nvmf_rdma_qpair *rqpair, *tmp_rqpair; int reaped, i; int count = 0; bool error = false; uint64_t poll_tsc = spdk_get_ticks(); + if (spdk_unlikely(rpoller->need_destroy)) { + /* If qpair is closed before poller destroy, nvmf_rdma_destroy_drained_qpair may not + * be called because we cannot poll anything from cq. So we call that here to force + * destroy the qpair after to_close turning true. + */ + RB_FOREACH_SAFE(rqpair, qpairs_tree, &rpoller->qpairs, tmp_rqpair) { + nvmf_rdma_destroy_drained_qpair(rqpair); + } + return 0; + } + /* Poll for completing operations. */ reaped = ibv_poll_cq(rpoller->cq, 32, wc); if (reaped < 0) { @@ -4203,19 +4390,74 @@ nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport, return count; } +static void +_nvmf_rdma_remove_destroyed_device(void *c) +{ + struct spdk_nvmf_rdma_transport *rtransport = c; + struct spdk_nvmf_rdma_device *device, *device_tmp; + int rc; + + TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) { + if (device->ready_to_destroy) { + destroy_ib_device(rtransport, device); + } + } + + free_poll_fds(rtransport); + rc = generate_poll_fds(rtransport); + /* cannot handle fd allocation error here */ + if (rc != 0) { + SPDK_ERRLOG("Failed to generate poll fds after remove ib device.\n"); + } +} + +static void +_nvmf_rdma_remove_poller_in_group_cb(void *c) +{ + struct poller_manage_ctx *ctx = c; + struct spdk_nvmf_rdma_transport *rtransport = ctx->rtransport; + struct spdk_nvmf_rdma_device *device = ctx->device; + struct spdk_thread *thread = ctx->thread; + + if (nvmf_rdma_all_pollers_are_destroyed(c)) { + /* destroy device when last poller is destroyed */ + device->ready_to_destroy = true; + spdk_thread_send_msg(thread, _nvmf_rdma_remove_destroyed_device, rtransport); + } +} + +static void +_nvmf_rdma_remove_poller_in_group(void *c) +{ + struct spdk_nvmf_rdma_qpair *rqpair, *tmp_qpair; + struct poller_manage_ctx *ctx = c; + + ctx->rpoller->need_destroy = true; + ctx->rpoller->destroy_cb_ctx = ctx; + ctx->rpoller->destroy_cb = _nvmf_rdma_remove_poller_in_group_cb; + + if (RB_EMPTY(&ctx->rpoller->qpairs)) { + nvmf_rdma_poller_destroy(ctx->rpoller); + } else { + RB_FOREACH_SAFE(rqpair, qpairs_tree, &ctx->rpoller->qpairs, tmp_qpair) { + spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL); + } + } +} + static int nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) { struct spdk_nvmf_rdma_transport *rtransport; struct spdk_nvmf_rdma_poll_group *rgroup; - struct spdk_nvmf_rdma_poller *rpoller; + struct spdk_nvmf_rdma_poller *rpoller, *tmp; int count, rc; rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport); rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group); count = 0; - TAILQ_FOREACH(rpoller, &rgroup->pollers, link) { + TAILQ_FOREACH_SAFE(rpoller, &rgroup->pollers, link, tmp) { rc = nvmf_rdma_poller_poll(rtransport, rpoller); if (rc < 0) { return rc; diff --git a/test/nvmf/nvmf.sh b/test/nvmf/nvmf.sh index f19844144..666f091c7 100755 --- a/test/nvmf/nvmf.sh +++ b/test/nvmf/nvmf.sh @@ -67,6 +67,8 @@ if [[ $NET_TYPE == phy ]]; then if ((${#TCP_INTERFACE_LIST[@]} > 0)); then run_test "nvmf_perf_adq" $rootdir/test/nvmf/target/perf_adq.sh "${TEST_ARGS[@]}" fi + else + run_test "nvmf_device_removal" test/nvmf/target/device_removal.sh "${TEST_ARGS[@]}" fi run_test "nvmf_shutdown" $rootdir/test/nvmf/target/shutdown.sh "${TEST_ARGS[@]}" # TODO: disabled due to intermittent failures. Need to triage. diff --git a/test/nvmf/target/device_removal.sh b/test/nvmf/target/device_removal.sh new file mode 100755 index 000000000..c32f0da4a --- /dev/null +++ b/test/nvmf/target/device_removal.sh @@ -0,0 +1,251 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (C) 2021 Intel Corporation +# All rights reserved. +# + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/test/setup/common.sh +source $rootdir/test/common/autotest_common.sh +source $rootdir/test/nvmf/common.sh + +nvmftestinit + +function get_subsystem_nqn() { + echo nqn.2016-06.io.spdk:system_$1 +} + +function create_subsystem_and_connect_on_netdev() { + local -a dev_name + + dev_name=$1 + malloc_name=$dev_name + nqn=$(get_subsystem_nqn "$dev_name") + ip=$(get_ip_address "$dev_name") + serial=SPDK000$dev_name + + MALLOC_BDEV_SIZE=128 + MALLOC_BLOCK_SIZE=512 + + $rpc_py bdev_malloc_create $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE -b $malloc_name + $rpc_py nvmf_create_subsystem $nqn -a -s $serial + $rpc_py nvmf_subsystem_add_ns $nqn $malloc_name + $rpc_py nvmf_subsystem_add_listener $nqn -t $TEST_TRANSPORT -a $ip -s $NVMF_PORT + + if ! nvme connect -t $TEST_TRANSPORT -n $nqn -a $ip -s $NVMF_PORT; then + exit 1 + fi + + waitforserial "$serial" + nvme_name=$(lsblk -l -o NAME,SERIAL | grep -oP "([\w]*)(?=\s+${serial})") + nvme_size=$(sec_size_to_bytes $nvme_name) + + echo "${nvme_name}" + return 0 +} + +function create_subsystem_and_connect() { + local -gA netdev_nvme_dict + netdev_nvme_dict=() + + $rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 "$@" + for net_dev in $(get_rdma_if_list); do + netdev_nvme_dict[$net_dev]="$(create_subsystem_and_connect_on_netdev $net_dev)" + done + + return 0 +} + +function rescan_pci() { + echo 1 > /sys/bus/pci/rescan +} + +function get_pci_dir() { + dev_name=$1 + readlink -f /sys/bus/pci/devices/*/net/${dev_name}/device +} + +function remove_one_nic() { + dev_name=$1 + echo 1 > $(get_pci_dir $dev_name)/remove +} + +function get_rdma_device_name() { + dev_name=$1 + ls $(get_pci_dir $dev_name)/infiniband +} + +function test_remove_and_rescan() { + nvmfappstart -m 0xF + + create_subsystem_and_connect "$@" + + for net_dev in "${!netdev_nvme_dict[@]}"; do + $rootdir/scripts/fio-wrapper -p nvmf -i 4096 -d 1 -t randrw -r 40 & + fio_pid=$! + sleep 3 + + nvme_dev=${netdev_nvme_dict[$net_dev]} + rdma_dev_name=$(get_rdma_device_name $net_dev) + origin_ip=$(get_ip_address "$net_dev") + pci_dir=$(get_pci_dir $net_dev) + + if ! $rpc_py nvmf_get_stats | grep "\"name\": \"$rdma_dev_name\""; then + echo "Device $rdma_dev_name is not registered in tgt". + exit 1 + fi + + remove_one_nic $net_dev + + for i in $(seq 1 10); do + if ! $rpc_py nvmf_get_stats | grep "\"name\": \"$rdma_dev_name\""; then + break + fi + if [[ $i == 10 ]]; then + # failed to remove this device + exit 1 + fi + sleep 1 + done + + rescan_pci + + for i in $(seq 1 10); do + new_net_dev=$(ls ${pci_dir}/net || echo) + if [[ -z $new_net_dev ]]; then + sleep 1 + elif [[ $new_net_dev != "$net_dev" ]]; then + echo "Device name changed after rescan, try rename." + ip link set $new_net_dev down && ip link set $new_net_dev name $net_dev + sleep 1 + else + break + fi + done + + if [[ -z $new_net_dev ]]; then + exit 1 + fi + + ip link set $net_dev up + if [[ -z $(get_ip_address "$net_dev") ]]; then + ip addr add $origin_ip/24 dev $net_dev + fi + done + + killprocess $nvmfpid + nvmfpid= + + return 0 +} + +function check_env_for_test_bonding_slaves() { + # only test with dual-port CX4/CX5. + + local -gA port_nic_map + local -g target_nics + + # gather dev with same bus-device. + for bdf in "${mlx[@]}"; do + pci_net_devs=("/sys/bus/pci/devices/$bdf/net/"*) + pci_net_devs=("${pci_net_devs[@]##*/}") + + bd=$(echo ${bdf} | cut -d '.' -f 1) + + port_nic_map[$bd]="${pci_net_devs[*]} ${port_nic_map[$bd]}" + done + + for x in "${port_nic_map[@]}"; do + ports=($x) + if ((${#ports[@]} >= 2)); then + target_nics=(${ports[@]}) + return 0 + fi + done + + return 1 +} + +BOND_NAME="bond_nvmf" +BOND_IP="10.11.11.26" +BOND_MASK="24" + +function clean_bond_device() { + if ip link | grep $BOND_NAME; then + ip link del $BOND_NAME + fi + for net_dev in "${target_nics[@]}"; do + ip link set $net_dev up + done +} + +function test_bonding_slaves_on_nics() { + nic1=$1 + nic2=$2 + + clean_bond_device + ip link add $BOND_NAME type bond mode 1 + ip link set $nic1 down && sudo ip link set $nic1 master $BOND_NAME + ip link set $nic2 down && sudo ip link set $nic2 master $BOND_NAME + ip link set $BOND_NAME up + ip addr add ${BOND_IP}/${BOND_MASK} dev $BOND_NAME + + # check slaves here + slaves=($(cat /sys/class/net/${BOND_NAME}/bonding/slaves)) + if ((${#slaves[@]} != 2)); then + exit 1 + fi + + # wait ib driver activated on bond device + sleep 5 + + nvmfappstart -m 0xF + $rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 + + create_subsystem_and_connect_on_netdev $BOND_NAME + + ib_count=$($rpc_py nvmf_get_stats | grep devices -A 2 | grep -c name) + echo "IB Count: " $ib_count + + $rootdir/scripts/fio-wrapper -p nvmf -i 4096 -d 1 -t randrw -r 10 & + fio_pid=$! + + sleep 2 + echo -$nic1 | sudo tee /sys/class/net/${BOND_NAME}/bonding/slaves + + ib_count2=$ib_count + for i in $(seq 1 10); do + ib_count2=$($rpc_py nvmf_get_stats | grep devices -A 2 | grep -c name) + if ((ib_count2 < ib_count)); then + break + fi + sleep 2 + done + if ((ib_count2 == ib_count)); then + exit 1 + fi + + # fio will exit when nvmf fin. do not wait here because it may be in D state. + killprocess $nvmfpid + nvmfpid= + return 0 +} + +function test_bond_slaves() { + check_env_for_test_bonding_slaves + if [[ -z "$target_nics" ]]; then + echo "No available nic ports to run this test." + exit 0 + fi + test_bonding_slaves_on_nics "${target_nics[@]}" +} + +run_test "nvmf_device_removal_pci_remove_no_srq" test_remove_and_rescan --no-srq +run_test "nvmf_device_removal_pci_remove" test_remove_and_rescan +# bond slaves case needs lag_master & vport_manager are enabled by mlxconfig +# and not work on CI machine currently. +# run_test "nvmf_device_removal_bond_slaves" test_bond_slaves + +nvmftestfini +clean_bond_device