test/nvmf: add a test case for trid failover.

Modifying the target_disconnect.sh test to include an example of
transport_id failover for an NVMe-oF controller.

Change-Id: I746ed737ab56c7dec6ee99e840c631ba46ee359e
Signed-off-by: Seth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/472230
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Seth Howell 2019-10-23 15:58:47 -07:00 committed by Jim Harris
parent a072421c90
commit eb2dee2444
4 changed files with 87 additions and 11 deletions

View File

@ -45,6 +45,7 @@
struct ctrlr_entry {
struct spdk_nvme_ctrlr *ctrlr;
struct spdk_nvme_transport_id failover_trid;
enum spdk_nvme_transport_type trtype;
struct ctrlr_entry *next;
char name[1024];
@ -117,6 +118,7 @@ static const char *g_core_mask;
struct trid_entry {
struct spdk_nvme_transport_id trid;
struct spdk_nvme_transport_id failover_trid;
TAILQ_ENTRY(trid_entry) tailq;
};
@ -361,6 +363,7 @@ register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry)
{
struct spdk_nvme_ns *ns;
struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry));
const struct spdk_nvme_transport_id *ctrlr_trid;
uint32_t nsid;
if (entry == NULL) {
@ -368,6 +371,22 @@ register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry)
exit(1);
}
ctrlr_trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
assert(ctrlr_trid != NULL);
/* each controller needs a unique failover trid. */
entry->failover_trid = trid_entry->failover_trid;
/*
* Users are allowed to leave the trid subnqn blank or specify a discovery controller subnqn.
* In those cases, the controller subnqn will not equal the trid_entry subnqn and, by association,
* the failover_trid subnqn.
* When we do failover, we want to reconnect to the same nqn so explicitly set the failover nqn to
* the ctrlr nqn here.
*/
snprintf(entry->failover_trid.subnqn, SPDK_NVMF_NQN_MAX_LEN + 1, "%s", ctrlr_trid->subnqn);
build_nvme_name(entry->name, sizeof(entry->name), ctrlr);
entry->ctrlr = ctrlr;
@ -583,6 +602,7 @@ static void usage(char *program_name)
printf("\t traddr Transport address (e.g. 192.168.100.8 for RDMA)\n");
printf("\t trsvcid Transport service identifier (e.g. 4420)\n");
printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN);
printf("\t alt_traddr (Optional) Alternative Transport address for failover.\n");
printf("\t Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n");
printf("\t[-k keep alive timeout period in millisecond]\n");
printf("\t[-s DPDK huge memory size in MB.]\n");
@ -614,6 +634,8 @@ add_trid(const char *trid_str)
{
struct trid_entry *trid_entry;
struct spdk_nvme_transport_id *trid;
char *alt_traddr;
int len;
trid_entry = calloc(1, sizeof(*trid_entry));
if (trid_entry == NULL) {
@ -629,6 +651,19 @@ add_trid(const char *trid_str)
return 1;
}
trid_entry->failover_trid = trid_entry->trid;
alt_traddr = strcasestr(trid_str, "alt_traddr:");
if (alt_traddr) {
alt_traddr += strlen("alt_traddr:");
len = strcspn(alt_traddr, " \t\n");
if (len > SPDK_NVMF_TRADDR_MAX_LEN) {
fprintf(stderr, "The failover traddr %s is too long.\n", alt_traddr);
return -1;
}
snprintf(trid_entry->failover_trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1, "%s", alt_traddr);
}
TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq);
return 0;
}
@ -975,9 +1010,11 @@ associate_workers_with_ns(void)
static void *
nvme_poll_ctrlrs(void *arg)
{
struct ctrlr_entry *entry;
int oldstate;
int rc;
struct ctrlr_entry *entry;
const struct spdk_nvme_transport_id *old_trid;
int oldstate;
int rc;
spdk_unaffinitize_thread();
@ -989,7 +1026,19 @@ nvme_poll_ctrlrs(void *arg)
rc = spdk_nvme_ctrlr_process_admin_completions(entry->ctrlr);
/* This controller has encountered a failure at the transport level. reset it. */
if (rc == -ENXIO) {
fprintf(stderr, "A controller has encountered a failure and is being reset.\n");
if (entry->num_resets == 0) {
old_trid = spdk_nvme_ctrlr_get_transport_id(entry->ctrlr);
fprintf(stderr, "A controller has encountered a failure and is being reset.\n");
if (spdk_nvme_transport_id_compare(old_trid, &entry->failover_trid)) {
fprintf(stderr, "Resorting to new failover address %s\n", entry->failover_trid.traddr);
spdk_nvme_ctrlr_fail(entry->ctrlr);
rc = spdk_nvme_ctrlr_set_trid(entry->ctrlr, &entry->failover_trid);
if (rc != 0) {
fprintf(stderr, "Unable to fail over to back up trid.\n");
}
}
}
rc = spdk_nvme_ctrlr_reset(entry->ctrlr);
if (rc != 0) {
entry->num_resets++;

View File

@ -886,6 +886,12 @@ spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *st
* it as an invalid key.
*/
continue;
} else if (strcasecmp(key, "alt_traddr") == 0) {
/*
* Used by applications for enabling transport ID failover.
* Please see the case above for more information on custom parameters.
*/
continue;
} else {
SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
}

View File

@ -183,6 +183,7 @@ function nvmftestinit()
if [ "$TEST_TRANSPORT" == "rdma" ]; then
RDMA_IP_LIST=$(get_available_rdma_ips)
NVMF_FIRST_TARGET_IP=$(echo "$RDMA_IP_LIST" | head -n 1)
NVMF_SECOND_TARGET_IP=$(echo "$RDMA_IP_LIST" | tail -n +2 | head -n 1)
if [ -z $NVMF_FIRST_TARGET_IP ]; then
echo "no NIC for nvmf test"
exit 0

View File

@ -22,28 +22,48 @@ function disconnect_init()
$rpc_py nvmf_subsystem_create nqn.2016-06.io.spdk:cnode1 -a -s SPDK00000000000001
$rpc_py nvmf_subsystem_add_ns nqn.2016-06.io.spdk:cnode1 Malloc0
$rpc_py nvmf_subsystem_add_listener nqn.2016-06.io.spdk:cnode1 -t $TEST_TRANSPORT -a $NVMF_FIRST_TARGET_IP -s $NVMF_PORT
$rpc_py nvmf_subsystem_add_listener nqn.2016-06.io.spdk:cnode1 -t $TEST_TRANSPORT -a $1 -s $NVMF_PORT
}
timing_enter target_disconnect
nvmftestinit
disconnect_init
disconnect_init $NVMF_FIRST_TARGET_IP
# If perf doesn't shut down, this test will time out.
$rootdir/examples/nvme/reconnect/reconnect -q 32 -o 4096 -w randrw -M 50 -t 20 -c 0xF -r "trtype:$TEST_TRANSPORT adrfam:IPv4 traddr:$NVMF_FIRST_TARGET_IP trsvcid:$NVMF_PORT" &
perfpid=$!
$rootdir/examples/nvme/reconnect/reconnect -q 32 -o 4096 -w randrw -M 50 -t 10 -c 0xF \
-r "trtype:$TEST_TRANSPORT adrfam:IPv4 traddr:$NVMF_FIRST_TARGET_IP trsvcid:$NVMF_PORT" &
reconnectpid=$!
sleep 5
sleep 2
kill -9 $nvmfpid
sleep 2
disconnect_init
disconnect_init $NVMF_FIRST_TARGET_IP
wait $perfpid
wait $reconnectpid
sync
if [ -n "$NVMF_SECOND_TARGET_IP" ]; then
timing_enter failover_test
$rootdir/examples/nvme/reconnect/reconnect -q 32 -o 4096 -w randrw -M 50 -t 10 -c 0xF \
-r "trtype:$TEST_TRANSPORT adrfam:IPv4 traddr:$NVMF_FIRST_TARGET_IP trsvcid:$NVMF_PORT alt_traddr:$NVMF_SECOND_TARGET_IP" &
reconnectpid=$!
sleep 2
kill -9 $nvmfpid
sleep 2
disconnect_init $NVMF_SECOND_TARGET_IP
wait $reconnectpid
sync
timing_exit failover_test
fi
trap - SIGINT SIGTERM EXIT
rm -f $PLUGIN_DIR/example_config_extended.fio || true
nvmftestfini