nvmf/rdma: Destroy all related resources after IB device removed
When IBV_EVENT_DEVICE_FATAL & RDMA_CM_EVENT_DEVICE_REMOVAL occurs, destory all userspace resources such as qp, poller and ibv_context. Signed-off-by: sijie.sun <sijie.sun@smartx.com> Change-Id: Ie4832e4804eb572d6ec3bdc44fb7f9339f443d7e Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15615 Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
This commit is contained in:
parent
07be7ca0ad
commit
8ddc5cd4a7
286
lib/nvmf/rdma.c
286
lib/nvmf/rdma.c
@ -283,6 +283,8 @@ struct spdk_nvmf_rdma_resources {
|
||||
|
||||
typedef void (*spdk_nvmf_rdma_qpair_ibv_event)(struct spdk_nvmf_rdma_qpair *rqpair);
|
||||
|
||||
typedef void (*spdk_poller_destroy_cb)(void *ctx);
|
||||
|
||||
struct spdk_nvmf_rdma_ibv_event_ctx {
|
||||
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||
spdk_nvmf_rdma_qpair_ibv_event cb_fn;
|
||||
@ -395,6 +397,7 @@ struct spdk_nvmf_rdma_poller {
|
||||
|
||||
/* The maximum number of I/O outstanding on the shared receive queue at one time */
|
||||
uint16_t max_srq_depth;
|
||||
bool need_destroy;
|
||||
|
||||
/* Shared receive queue */
|
||||
struct spdk_rdma_srq *srq;
|
||||
@ -402,6 +405,9 @@ struct spdk_nvmf_rdma_poller {
|
||||
struct spdk_nvmf_rdma_resources *resources;
|
||||
struct spdk_nvmf_rdma_poller_stat stat;
|
||||
|
||||
spdk_poller_destroy_cb destroy_cb;
|
||||
void *destroy_cb_ctx;
|
||||
|
||||
RB_HEAD(qpairs_tree, spdk_nvmf_rdma_qpair) qpairs;
|
||||
|
||||
STAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs_pending_recv;
|
||||
@ -436,6 +442,8 @@ struct spdk_nvmf_rdma_device {
|
||||
struct ibv_pd *pd;
|
||||
|
||||
int num_srq;
|
||||
bool need_destroy;
|
||||
bool ready_to_destroy;
|
||||
|
||||
TAILQ_ENTRY(spdk_nvmf_rdma_device) link;
|
||||
};
|
||||
@ -476,6 +484,16 @@ struct spdk_nvmf_rdma_transport {
|
||||
TAILQ_HEAD(, spdk_nvmf_rdma_poll_group) poll_groups;
|
||||
};
|
||||
|
||||
struct poller_manage_ctx {
|
||||
struct spdk_nvmf_rdma_transport *rtransport;
|
||||
struct spdk_nvmf_rdma_poll_group *rgroup;
|
||||
struct spdk_nvmf_rdma_poller *rpoller;
|
||||
struct spdk_nvmf_rdma_device *device;
|
||||
|
||||
struct spdk_thread *thread;
|
||||
volatile int *inflight_op_counter;
|
||||
};
|
||||
|
||||
static const struct spdk_json_object_decoder rdma_transport_opts_decoder[] = {
|
||||
{
|
||||
"num_cqe", offsetof(struct rdma_transport_opts, num_cqe),
|
||||
@ -516,6 +534,8 @@ static void _poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
static void _poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_poller *rpoller);
|
||||
|
||||
static void _nvmf_rdma_remove_destroyed_device(void *c);
|
||||
|
||||
static inline int
|
||||
nvmf_rdma_check_ibv_state(enum ibv_qp_state state)
|
||||
{
|
||||
@ -831,6 +851,8 @@ nvmf_rdma_qpair_clean_ibv_events(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||
}
|
||||
}
|
||||
|
||||
static void nvmf_rdma_poller_destroy(struct spdk_nvmf_rdma_poller *poller);
|
||||
|
||||
static void
|
||||
nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||
{
|
||||
@ -909,6 +931,9 @@ nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||
rqpair->destruct_channel = NULL;
|
||||
}
|
||||
|
||||
if (rqpair->poller && rqpair->poller->need_destroy && RB_EMPTY(&rqpair->poller->qpairs)) {
|
||||
nvmf_rdma_poller_destroy(rqpair->poller);
|
||||
}
|
||||
free(rqpair);
|
||||
}
|
||||
|
||||
@ -2682,6 +2707,8 @@ destroy_ib_device(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
ibv_dealloc_pd(device->pd);
|
||||
}
|
||||
}
|
||||
SPDK_NOTICELOG("IB device %s[%p] is destroyed.\n", ibv_get_device_name(device->context->device),
|
||||
device);
|
||||
free(device);
|
||||
}
|
||||
|
||||
@ -2878,6 +2905,96 @@ nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport,
|
||||
}
|
||||
}
|
||||
|
||||
static void _nvmf_rdma_remove_poller_in_group(void *c);
|
||||
|
||||
static bool
|
||||
nvmf_rdma_all_pollers_are_destroyed(void *c)
|
||||
{
|
||||
struct poller_manage_ctx *ctx = c;
|
||||
int counter;
|
||||
|
||||
counter = __atomic_sub_fetch(ctx->inflight_op_counter, 1, __ATOMIC_SEQ_CST);
|
||||
SPDK_DEBUGLOG(rdma, "nvmf_rdma_all_pollers_are_destroyed called. counter: %d, poller: %p\n",
|
||||
counter, ctx->rpoller);
|
||||
|
||||
if (counter == 0) {
|
||||
free((void *)ctx->inflight_op_counter);
|
||||
}
|
||||
free(ctx);
|
||||
|
||||
return counter == 0;
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_device *device,
|
||||
bool *has_inflight)
|
||||
{
|
||||
struct spdk_nvmf_rdma_poll_group *rgroup;
|
||||
struct spdk_nvmf_rdma_poller *rpoller;
|
||||
struct spdk_nvmf_poll_group *poll_group;
|
||||
struct poller_manage_ctx *ctx;
|
||||
bool found;
|
||||
int *inflight_counter;
|
||||
spdk_msg_fn do_fn;
|
||||
|
||||
*has_inflight = false;
|
||||
do_fn = _nvmf_rdma_remove_poller_in_group;
|
||||
inflight_counter = calloc(1, sizeof(int));
|
||||
if (!inflight_counter) {
|
||||
SPDK_ERRLOG("Failed to allocate inflight counter when removing pollers\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) {
|
||||
(*inflight_counter)++;
|
||||
}
|
||||
|
||||
TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) {
|
||||
found = false;
|
||||
TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
|
||||
if (rpoller->device == device) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
__atomic_fetch_sub(inflight_counter, 1, __ATOMIC_SEQ_CST);
|
||||
continue;
|
||||
}
|
||||
|
||||
ctx = calloc(1, sizeof(struct poller_manage_ctx));
|
||||
if (!ctx) {
|
||||
SPDK_ERRLOG("Failed to allocate poller_manage_ctx when removing pollers\n");
|
||||
if (!*has_inflight) {
|
||||
free(inflight_counter);
|
||||
}
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ctx->rtransport = rtransport;
|
||||
ctx->rgroup = rgroup;
|
||||
ctx->rpoller = rpoller;
|
||||
ctx->device = device;
|
||||
ctx->thread = spdk_get_thread();
|
||||
ctx->inflight_op_counter = inflight_counter;
|
||||
*has_inflight = true;
|
||||
|
||||
poll_group = rgroup->group.group;
|
||||
if (poll_group->thread != spdk_get_thread()) {
|
||||
spdk_thread_send_msg(poll_group->thread, do_fn, ctx);
|
||||
} else {
|
||||
do_fn(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
if (!*has_inflight) {
|
||||
free(inflight_counter);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_qpair *rqpair, bool drain)
|
||||
@ -2954,6 +3071,12 @@ nvmf_rdma_destroy_drained_qpair(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||
return;
|
||||
}
|
||||
|
||||
/* device is already destroyed and we should force destroy this qpair. */
|
||||
if (rqpair->poller && rqpair->poller->need_destroy) {
|
||||
nvmf_rdma_qpair_destroy(rqpair);
|
||||
return;
|
||||
}
|
||||
|
||||
/* In non SRQ path, we will reach rqpair->max_queue_depth. In SRQ path, we will get the last_wqe event. */
|
||||
if (rqpair->current_send_depth != 0) {
|
||||
return;
|
||||
@ -3069,6 +3192,42 @@ nvmf_rdma_handle_cm_event_addr_change(struct spdk_nvmf_transport *transport,
|
||||
return event_acked;
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_device *device)
|
||||
{
|
||||
struct spdk_nvmf_rdma_port *port, *port_tmp;
|
||||
int rc;
|
||||
bool has_inflight;
|
||||
|
||||
rc = nvmf_rdma_remove_pollers_on_dev(rtransport, device, &has_inflight);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Failed to handle device removal, rc %d\n", rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!has_inflight) {
|
||||
/* no pollers, destroy the device */
|
||||
device->ready_to_destroy = true;
|
||||
spdk_thread_send_msg(spdk_get_thread(), _nvmf_rdma_remove_destroyed_device, rtransport);
|
||||
}
|
||||
|
||||
TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) {
|
||||
if (port->device == device) {
|
||||
SPDK_NOTICELOG("Port %s:%s on device %s is being removed.\n",
|
||||
port->trid->traddr,
|
||||
port->trid->trsvcid,
|
||||
ibv_get_device_name(port->device->context->device));
|
||||
|
||||
/* keep NVMF listener and only destroy structures of the
|
||||
* RDMA transport. when the device comes back we can retry listening
|
||||
* and the application's workflow will not be interrupted.
|
||||
*/
|
||||
nvmf_rdma_stop_listen(&rtransport->transport, port->trid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport,
|
||||
struct rdma_cm_event *event)
|
||||
@ -3079,14 +3238,11 @@ nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport,
|
||||
port = event->id->context;
|
||||
rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
|
||||
|
||||
SPDK_NOTICELOG("Port %s:%s is being removed\n", port->trid->traddr, port->trid->trsvcid);
|
||||
|
||||
nvmf_rdma_disconnect_qpairs_on_port(rtransport, port);
|
||||
|
||||
rdma_ack_cm_event(event);
|
||||
|
||||
while (spdk_nvmf_transport_stop_listen(transport, port->trid) == 0) {
|
||||
;
|
||||
if (!port->device->need_destroy) {
|
||||
port->device->need_destroy = true;
|
||||
nvmf_rdma_handle_device_removal(rtransport, port->device);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3159,15 +3315,11 @@ nvmf_process_cm_event(struct spdk_nvmf_transport *transport)
|
||||
* don't make attempts to call any ibv_query/modify/create functions. We can only call
|
||||
* ibv_destroy* functions to release user space memory allocated by IB. All kernel
|
||||
* resources are already cleaned. */
|
||||
if (event->id->qp) {
|
||||
if (!event->id->qp) {
|
||||
/* If rdma_cm event has a valid `qp` pointer then the event refers to the
|
||||
* corresponding qpair. Otherwise the event refers to a listening device */
|
||||
rc = nvmf_rdma_disconnect(event);
|
||||
if (rc < 0) {
|
||||
SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
* corresponding qpair. Otherwise the event refers to a listening device.
|
||||
* Only handle this event on device because we will disconnect all qpairs
|
||||
* when removing device */
|
||||
nvmf_rdma_handle_cm_event_port_removal(transport, event);
|
||||
event_acked = true;
|
||||
}
|
||||
@ -3323,8 +3475,12 @@ nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case IBV_EVENT_CQ_ERR:
|
||||
case IBV_EVENT_DEVICE_FATAL:
|
||||
SPDK_ERRLOG("Device Fatal event[%s] received on %s. device: %p\n",
|
||||
ibv_event_type_str(event.event_type), ibv_get_device_name(device->context->device), device);
|
||||
device->need_destroy = true;
|
||||
break;
|
||||
case IBV_EVENT_CQ_ERR:
|
||||
case IBV_EVENT_PORT_ACTIVE:
|
||||
case IBV_EVENT_PORT_ERR:
|
||||
case IBV_EVENT_LID_CHANGE:
|
||||
@ -3369,6 +3525,7 @@ nvmf_rdma_accept(void *ctx)
|
||||
struct spdk_nvmf_rdma_transport *rtransport;
|
||||
struct spdk_nvmf_rdma_device *device, *tmp;
|
||||
uint32_t count;
|
||||
short revents;
|
||||
|
||||
rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
|
||||
count = nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0);
|
||||
@ -3389,8 +3546,17 @@ nvmf_rdma_accept(void *ctx)
|
||||
|
||||
/* Second and subsequent poll descriptors are IB async events */
|
||||
TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
|
||||
if (rtransport->poll_fds[i++].revents & POLLIN) {
|
||||
nvmf_process_ib_events(device, 32);
|
||||
revents = rtransport->poll_fds[i++].revents;
|
||||
if (revents & POLLIN) {
|
||||
if (spdk_likely(!device->need_destroy)) {
|
||||
nvmf_process_ib_events(device, 32);
|
||||
if (spdk_unlikely(device->need_destroy)) {
|
||||
nvmf_rdma_handle_device_removal(rtransport, device);
|
||||
}
|
||||
}
|
||||
nfds--;
|
||||
} else if (revents & POLLNVAL || revents & POLLHUP) {
|
||||
SPDK_ERRLOG("Receive unknown revent %x on device %p\n", (int)revents, device);
|
||||
nfds--;
|
||||
}
|
||||
}
|
||||
@ -3632,6 +3798,9 @@ static void
|
||||
nvmf_rdma_poller_destroy(struct spdk_nvmf_rdma_poller *poller)
|
||||
{
|
||||
struct spdk_nvmf_rdma_qpair *qpair, *tmp_qpair;
|
||||
int rc;
|
||||
|
||||
TAILQ_REMOVE(&poller->group->pollers, poller, link);
|
||||
RB_FOREACH_SAFE(qpair, qpairs_tree, &poller->qpairs, tmp_qpair) {
|
||||
nvmf_rdma_qpair_destroy(qpair);
|
||||
}
|
||||
@ -3645,7 +3814,15 @@ nvmf_rdma_poller_destroy(struct spdk_nvmf_rdma_poller *poller)
|
||||
}
|
||||
|
||||
if (poller->cq) {
|
||||
ibv_destroy_cq(poller->cq);
|
||||
rc = ibv_destroy_cq(poller->cq);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Destroy cq return %d, error: %s\n", rc, strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
if (poller->destroy_cb) {
|
||||
poller->destroy_cb(poller->destroy_cb_ctx);
|
||||
poller->destroy_cb = NULL;
|
||||
}
|
||||
|
||||
free(poller);
|
||||
@ -3664,7 +3841,6 @@ nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
|
||||
}
|
||||
|
||||
TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
|
||||
TAILQ_REMOVE(&rgroup->pollers, poller, link);
|
||||
nvmf_rdma_poller_destroy(poller);
|
||||
}
|
||||
|
||||
@ -4055,12 +4231,23 @@ nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_wr *rdma_wr;
|
||||
struct spdk_nvmf_rdma_request *rdma_req;
|
||||
struct spdk_nvmf_rdma_recv *rdma_recv;
|
||||
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||
struct spdk_nvmf_rdma_qpair *rqpair, *tmp_rqpair;
|
||||
int reaped, i;
|
||||
int count = 0;
|
||||
bool error = false;
|
||||
uint64_t poll_tsc = spdk_get_ticks();
|
||||
|
||||
if (spdk_unlikely(rpoller->need_destroy)) {
|
||||
/* If qpair is closed before poller destroy, nvmf_rdma_destroy_drained_qpair may not
|
||||
* be called because we cannot poll anything from cq. So we call that here to force
|
||||
* destroy the qpair after to_close turning true.
|
||||
*/
|
||||
RB_FOREACH_SAFE(rqpair, qpairs_tree, &rpoller->qpairs, tmp_rqpair) {
|
||||
nvmf_rdma_destroy_drained_qpair(rqpair);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Poll for completing operations. */
|
||||
reaped = ibv_poll_cq(rpoller->cq, 32, wc);
|
||||
if (reaped < 0) {
|
||||
@ -4203,19 +4390,74 @@ nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
return count;
|
||||
}
|
||||
|
||||
static void
|
||||
_nvmf_rdma_remove_destroyed_device(void *c)
|
||||
{
|
||||
struct spdk_nvmf_rdma_transport *rtransport = c;
|
||||
struct spdk_nvmf_rdma_device *device, *device_tmp;
|
||||
int rc;
|
||||
|
||||
TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) {
|
||||
if (device->ready_to_destroy) {
|
||||
destroy_ib_device(rtransport, device);
|
||||
}
|
||||
}
|
||||
|
||||
free_poll_fds(rtransport);
|
||||
rc = generate_poll_fds(rtransport);
|
||||
/* cannot handle fd allocation error here */
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Failed to generate poll fds after remove ib device.\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_nvmf_rdma_remove_poller_in_group_cb(void *c)
|
||||
{
|
||||
struct poller_manage_ctx *ctx = c;
|
||||
struct spdk_nvmf_rdma_transport *rtransport = ctx->rtransport;
|
||||
struct spdk_nvmf_rdma_device *device = ctx->device;
|
||||
struct spdk_thread *thread = ctx->thread;
|
||||
|
||||
if (nvmf_rdma_all_pollers_are_destroyed(c)) {
|
||||
/* destroy device when last poller is destroyed */
|
||||
device->ready_to_destroy = true;
|
||||
spdk_thread_send_msg(thread, _nvmf_rdma_remove_destroyed_device, rtransport);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_nvmf_rdma_remove_poller_in_group(void *c)
|
||||
{
|
||||
struct spdk_nvmf_rdma_qpair *rqpair, *tmp_qpair;
|
||||
struct poller_manage_ctx *ctx = c;
|
||||
|
||||
ctx->rpoller->need_destroy = true;
|
||||
ctx->rpoller->destroy_cb_ctx = ctx;
|
||||
ctx->rpoller->destroy_cb = _nvmf_rdma_remove_poller_in_group_cb;
|
||||
|
||||
if (RB_EMPTY(&ctx->rpoller->qpairs)) {
|
||||
nvmf_rdma_poller_destroy(ctx->rpoller);
|
||||
} else {
|
||||
RB_FOREACH_SAFE(rqpair, qpairs_tree, &ctx->rpoller->qpairs, tmp_qpair) {
|
||||
spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
|
||||
{
|
||||
struct spdk_nvmf_rdma_transport *rtransport;
|
||||
struct spdk_nvmf_rdma_poll_group *rgroup;
|
||||
struct spdk_nvmf_rdma_poller *rpoller;
|
||||
struct spdk_nvmf_rdma_poller *rpoller, *tmp;
|
||||
int count, rc;
|
||||
|
||||
rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport);
|
||||
rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
|
||||
|
||||
count = 0;
|
||||
TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
|
||||
TAILQ_FOREACH_SAFE(rpoller, &rgroup->pollers, link, tmp) {
|
||||
rc = nvmf_rdma_poller_poll(rtransport, rpoller);
|
||||
if (rc < 0) {
|
||||
return rc;
|
||||
|
@ -67,6 +67,8 @@ if [[ $NET_TYPE == phy ]]; then
|
||||
if ((${#TCP_INTERFACE_LIST[@]} > 0)); then
|
||||
run_test "nvmf_perf_adq" $rootdir/test/nvmf/target/perf_adq.sh "${TEST_ARGS[@]}"
|
||||
fi
|
||||
else
|
||||
run_test "nvmf_device_removal" test/nvmf/target/device_removal.sh "${TEST_ARGS[@]}"
|
||||
fi
|
||||
run_test "nvmf_shutdown" $rootdir/test/nvmf/target/shutdown.sh "${TEST_ARGS[@]}"
|
||||
# TODO: disabled due to intermittent failures. Need to triage.
|
||||
|
251
test/nvmf/target/device_removal.sh
Executable file
251
test/nvmf/target/device_removal.sh
Executable file
@ -0,0 +1,251 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
|
||||
testdir=$(readlink -f $(dirname $0))
|
||||
rootdir=$(readlink -f $testdir/../../..)
|
||||
source $rootdir/test/setup/common.sh
|
||||
source $rootdir/test/common/autotest_common.sh
|
||||
source $rootdir/test/nvmf/common.sh
|
||||
|
||||
nvmftestinit
|
||||
|
||||
function get_subsystem_nqn() {
|
||||
echo nqn.2016-06.io.spdk:system_$1
|
||||
}
|
||||
|
||||
function create_subsystem_and_connect_on_netdev() {
|
||||
local -a dev_name
|
||||
|
||||
dev_name=$1
|
||||
malloc_name=$dev_name
|
||||
nqn=$(get_subsystem_nqn "$dev_name")
|
||||
ip=$(get_ip_address "$dev_name")
|
||||
serial=SPDK000$dev_name
|
||||
|
||||
MALLOC_BDEV_SIZE=128
|
||||
MALLOC_BLOCK_SIZE=512
|
||||
|
||||
$rpc_py bdev_malloc_create $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE -b $malloc_name
|
||||
$rpc_py nvmf_create_subsystem $nqn -a -s $serial
|
||||
$rpc_py nvmf_subsystem_add_ns $nqn $malloc_name
|
||||
$rpc_py nvmf_subsystem_add_listener $nqn -t $TEST_TRANSPORT -a $ip -s $NVMF_PORT
|
||||
|
||||
if ! nvme connect -t $TEST_TRANSPORT -n $nqn -a $ip -s $NVMF_PORT; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
waitforserial "$serial"
|
||||
nvme_name=$(lsblk -l -o NAME,SERIAL | grep -oP "([\w]*)(?=\s+${serial})")
|
||||
nvme_size=$(sec_size_to_bytes $nvme_name)
|
||||
|
||||
echo "${nvme_name}"
|
||||
return 0
|
||||
}
|
||||
|
||||
function create_subsystem_and_connect() {
|
||||
local -gA netdev_nvme_dict
|
||||
netdev_nvme_dict=()
|
||||
|
||||
$rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 "$@"
|
||||
for net_dev in $(get_rdma_if_list); do
|
||||
netdev_nvme_dict[$net_dev]="$(create_subsystem_and_connect_on_netdev $net_dev)"
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
function rescan_pci() {
|
||||
echo 1 > /sys/bus/pci/rescan
|
||||
}
|
||||
|
||||
function get_pci_dir() {
|
||||
dev_name=$1
|
||||
readlink -f /sys/bus/pci/devices/*/net/${dev_name}/device
|
||||
}
|
||||
|
||||
function remove_one_nic() {
|
||||
dev_name=$1
|
||||
echo 1 > $(get_pci_dir $dev_name)/remove
|
||||
}
|
||||
|
||||
function get_rdma_device_name() {
|
||||
dev_name=$1
|
||||
ls $(get_pci_dir $dev_name)/infiniband
|
||||
}
|
||||
|
||||
function test_remove_and_rescan() {
|
||||
nvmfappstart -m 0xF
|
||||
|
||||
create_subsystem_and_connect "$@"
|
||||
|
||||
for net_dev in "${!netdev_nvme_dict[@]}"; do
|
||||
$rootdir/scripts/fio-wrapper -p nvmf -i 4096 -d 1 -t randrw -r 40 &
|
||||
fio_pid=$!
|
||||
sleep 3
|
||||
|
||||
nvme_dev=${netdev_nvme_dict[$net_dev]}
|
||||
rdma_dev_name=$(get_rdma_device_name $net_dev)
|
||||
origin_ip=$(get_ip_address "$net_dev")
|
||||
pci_dir=$(get_pci_dir $net_dev)
|
||||
|
||||
if ! $rpc_py nvmf_get_stats | grep "\"name\": \"$rdma_dev_name\""; then
|
||||
echo "Device $rdma_dev_name is not registered in tgt".
|
||||
exit 1
|
||||
fi
|
||||
|
||||
remove_one_nic $net_dev
|
||||
|
||||
for i in $(seq 1 10); do
|
||||
if ! $rpc_py nvmf_get_stats | grep "\"name\": \"$rdma_dev_name\""; then
|
||||
break
|
||||
fi
|
||||
if [[ $i == 10 ]]; then
|
||||
# failed to remove this device
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
rescan_pci
|
||||
|
||||
for i in $(seq 1 10); do
|
||||
new_net_dev=$(ls ${pci_dir}/net || echo)
|
||||
if [[ -z $new_net_dev ]]; then
|
||||
sleep 1
|
||||
elif [[ $new_net_dev != "$net_dev" ]]; then
|
||||
echo "Device name changed after rescan, try rename."
|
||||
ip link set $new_net_dev down && ip link set $new_net_dev name $net_dev
|
||||
sleep 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z $new_net_dev ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ip link set $net_dev up
|
||||
if [[ -z $(get_ip_address "$net_dev") ]]; then
|
||||
ip addr add $origin_ip/24 dev $net_dev
|
||||
fi
|
||||
done
|
||||
|
||||
killprocess $nvmfpid
|
||||
nvmfpid=
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
function check_env_for_test_bonding_slaves() {
|
||||
# only test with dual-port CX4/CX5.
|
||||
|
||||
local -gA port_nic_map
|
||||
local -g target_nics
|
||||
|
||||
# gather dev with same bus-device.
|
||||
for bdf in "${mlx[@]}"; do
|
||||
pci_net_devs=("/sys/bus/pci/devices/$bdf/net/"*)
|
||||
pci_net_devs=("${pci_net_devs[@]##*/}")
|
||||
|
||||
bd=$(echo ${bdf} | cut -d '.' -f 1)
|
||||
|
||||
port_nic_map[$bd]="${pci_net_devs[*]} ${port_nic_map[$bd]}"
|
||||
done
|
||||
|
||||
for x in "${port_nic_map[@]}"; do
|
||||
ports=($x)
|
||||
if ((${#ports[@]} >= 2)); then
|
||||
target_nics=(${ports[@]})
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
BOND_NAME="bond_nvmf"
|
||||
BOND_IP="10.11.11.26"
|
||||
BOND_MASK="24"
|
||||
|
||||
function clean_bond_device() {
|
||||
if ip link | grep $BOND_NAME; then
|
||||
ip link del $BOND_NAME
|
||||
fi
|
||||
for net_dev in "${target_nics[@]}"; do
|
||||
ip link set $net_dev up
|
||||
done
|
||||
}
|
||||
|
||||
function test_bonding_slaves_on_nics() {
|
||||
nic1=$1
|
||||
nic2=$2
|
||||
|
||||
clean_bond_device
|
||||
ip link add $BOND_NAME type bond mode 1
|
||||
ip link set $nic1 down && sudo ip link set $nic1 master $BOND_NAME
|
||||
ip link set $nic2 down && sudo ip link set $nic2 master $BOND_NAME
|
||||
ip link set $BOND_NAME up
|
||||
ip addr add ${BOND_IP}/${BOND_MASK} dev $BOND_NAME
|
||||
|
||||
# check slaves here
|
||||
slaves=($(cat /sys/class/net/${BOND_NAME}/bonding/slaves))
|
||||
if ((${#slaves[@]} != 2)); then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# wait ib driver activated on bond device
|
||||
sleep 5
|
||||
|
||||
nvmfappstart -m 0xF
|
||||
$rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192
|
||||
|
||||
create_subsystem_and_connect_on_netdev $BOND_NAME
|
||||
|
||||
ib_count=$($rpc_py nvmf_get_stats | grep devices -A 2 | grep -c name)
|
||||
echo "IB Count: " $ib_count
|
||||
|
||||
$rootdir/scripts/fio-wrapper -p nvmf -i 4096 -d 1 -t randrw -r 10 &
|
||||
fio_pid=$!
|
||||
|
||||
sleep 2
|
||||
echo -$nic1 | sudo tee /sys/class/net/${BOND_NAME}/bonding/slaves
|
||||
|
||||
ib_count2=$ib_count
|
||||
for i in $(seq 1 10); do
|
||||
ib_count2=$($rpc_py nvmf_get_stats | grep devices -A 2 | grep -c name)
|
||||
if ((ib_count2 < ib_count)); then
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
if ((ib_count2 == ib_count)); then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# fio will exit when nvmf fin. do not wait here because it may be in D state.
|
||||
killprocess $nvmfpid
|
||||
nvmfpid=
|
||||
return 0
|
||||
}
|
||||
|
||||
function test_bond_slaves() {
|
||||
check_env_for_test_bonding_slaves
|
||||
if [[ -z "$target_nics" ]]; then
|
||||
echo "No available nic ports to run this test."
|
||||
exit 0
|
||||
fi
|
||||
test_bonding_slaves_on_nics "${target_nics[@]}"
|
||||
}
|
||||
|
||||
run_test "nvmf_device_removal_pci_remove_no_srq" test_remove_and_rescan --no-srq
|
||||
run_test "nvmf_device_removal_pci_remove" test_remove_and_rescan
|
||||
# bond slaves case needs lag_master & vport_manager are enabled by mlxconfig
|
||||
# and not work on CI machine currently.
|
||||
# run_test "nvmf_device_removal_bond_slaves" test_bond_slaves
|
||||
|
||||
nvmftestfini
|
||||
clean_bond_device
|
Loading…
Reference in New Issue
Block a user