test/nvmf: add a test case for trid failover.
Modifying the target_disconnect.sh test to include an example of transport_id failover for an NVMe-oF controller. Change-Id: I746ed737ab56c7dec6ee99e840c631ba46ee359e Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/472230 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
a072421c90
commit
eb2dee2444
@ -45,6 +45,7 @@
|
||||
|
||||
struct ctrlr_entry {
|
||||
struct spdk_nvme_ctrlr *ctrlr;
|
||||
struct spdk_nvme_transport_id failover_trid;
|
||||
enum spdk_nvme_transport_type trtype;
|
||||
struct ctrlr_entry *next;
|
||||
char name[1024];
|
||||
@ -117,6 +118,7 @@ static const char *g_core_mask;
|
||||
|
||||
struct trid_entry {
|
||||
struct spdk_nvme_transport_id trid;
|
||||
struct spdk_nvme_transport_id failover_trid;
|
||||
TAILQ_ENTRY(trid_entry) tailq;
|
||||
};
|
||||
|
||||
@ -361,6 +363,7 @@ register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry)
|
||||
{
|
||||
struct spdk_nvme_ns *ns;
|
||||
struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry));
|
||||
const struct spdk_nvme_transport_id *ctrlr_trid;
|
||||
uint32_t nsid;
|
||||
|
||||
if (entry == NULL) {
|
||||
@ -368,6 +371,22 @@ register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ctrlr_trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
|
||||
assert(ctrlr_trid != NULL);
|
||||
|
||||
/* each controller needs a unique failover trid. */
|
||||
entry->failover_trid = trid_entry->failover_trid;
|
||||
|
||||
/*
|
||||
* Users are allowed to leave the trid subnqn blank or specify a discovery controller subnqn.
|
||||
* In those cases, the controller subnqn will not equal the trid_entry subnqn and, by association,
|
||||
* the failover_trid subnqn.
|
||||
* When we do failover, we want to reconnect to the same nqn so explicitly set the failover nqn to
|
||||
* the ctrlr nqn here.
|
||||
*/
|
||||
snprintf(entry->failover_trid.subnqn, SPDK_NVMF_NQN_MAX_LEN + 1, "%s", ctrlr_trid->subnqn);
|
||||
|
||||
|
||||
build_nvme_name(entry->name, sizeof(entry->name), ctrlr);
|
||||
|
||||
entry->ctrlr = ctrlr;
|
||||
@ -583,6 +602,7 @@ static void usage(char *program_name)
|
||||
printf("\t traddr Transport address (e.g. 192.168.100.8 for RDMA)\n");
|
||||
printf("\t trsvcid Transport service identifier (e.g. 4420)\n");
|
||||
printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN);
|
||||
printf("\t alt_traddr (Optional) Alternative Transport address for failover.\n");
|
||||
printf("\t Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n");
|
||||
printf("\t[-k keep alive timeout period in millisecond]\n");
|
||||
printf("\t[-s DPDK huge memory size in MB.]\n");
|
||||
@ -614,6 +634,8 @@ add_trid(const char *trid_str)
|
||||
{
|
||||
struct trid_entry *trid_entry;
|
||||
struct spdk_nvme_transport_id *trid;
|
||||
char *alt_traddr;
|
||||
int len;
|
||||
|
||||
trid_entry = calloc(1, sizeof(*trid_entry));
|
||||
if (trid_entry == NULL) {
|
||||
@ -629,6 +651,19 @@ add_trid(const char *trid_str)
|
||||
return 1;
|
||||
}
|
||||
|
||||
trid_entry->failover_trid = trid_entry->trid;
|
||||
|
||||
alt_traddr = strcasestr(trid_str, "alt_traddr:");
|
||||
if (alt_traddr) {
|
||||
alt_traddr += strlen("alt_traddr:");
|
||||
len = strcspn(alt_traddr, " \t\n");
|
||||
if (len > SPDK_NVMF_TRADDR_MAX_LEN) {
|
||||
fprintf(stderr, "The failover traddr %s is too long.\n", alt_traddr);
|
||||
return -1;
|
||||
}
|
||||
snprintf(trid_entry->failover_trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1, "%s", alt_traddr);
|
||||
}
|
||||
|
||||
TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq);
|
||||
return 0;
|
||||
}
|
||||
@ -975,9 +1010,11 @@ associate_workers_with_ns(void)
|
||||
static void *
|
||||
nvme_poll_ctrlrs(void *arg)
|
||||
{
|
||||
struct ctrlr_entry *entry;
|
||||
int oldstate;
|
||||
int rc;
|
||||
struct ctrlr_entry *entry;
|
||||
const struct spdk_nvme_transport_id *old_trid;
|
||||
int oldstate;
|
||||
int rc;
|
||||
|
||||
|
||||
spdk_unaffinitize_thread();
|
||||
|
||||
@ -989,7 +1026,19 @@ nvme_poll_ctrlrs(void *arg)
|
||||
rc = spdk_nvme_ctrlr_process_admin_completions(entry->ctrlr);
|
||||
/* This controller has encountered a failure at the transport level. reset it. */
|
||||
if (rc == -ENXIO) {
|
||||
fprintf(stderr, "A controller has encountered a failure and is being reset.\n");
|
||||
if (entry->num_resets == 0) {
|
||||
old_trid = spdk_nvme_ctrlr_get_transport_id(entry->ctrlr);
|
||||
fprintf(stderr, "A controller has encountered a failure and is being reset.\n");
|
||||
if (spdk_nvme_transport_id_compare(old_trid, &entry->failover_trid)) {
|
||||
fprintf(stderr, "Resorting to new failover address %s\n", entry->failover_trid.traddr);
|
||||
spdk_nvme_ctrlr_fail(entry->ctrlr);
|
||||
rc = spdk_nvme_ctrlr_set_trid(entry->ctrlr, &entry->failover_trid);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "Unable to fail over to back up trid.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc = spdk_nvme_ctrlr_reset(entry->ctrlr);
|
||||
if (rc != 0) {
|
||||
entry->num_resets++;
|
||||
|
@ -886,6 +886,12 @@ spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *st
|
||||
* it as an invalid key.
|
||||
*/
|
||||
continue;
|
||||
} else if (strcasecmp(key, "alt_traddr") == 0) {
|
||||
/*
|
||||
* Used by applications for enabling transport ID failover.
|
||||
* Please see the case above for more information on custom parameters.
|
||||
*/
|
||||
continue;
|
||||
} else {
|
||||
SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
|
||||
}
|
||||
|
@ -183,6 +183,7 @@ function nvmftestinit()
|
||||
if [ "$TEST_TRANSPORT" == "rdma" ]; then
|
||||
RDMA_IP_LIST=$(get_available_rdma_ips)
|
||||
NVMF_FIRST_TARGET_IP=$(echo "$RDMA_IP_LIST" | head -n 1)
|
||||
NVMF_SECOND_TARGET_IP=$(echo "$RDMA_IP_LIST" | tail -n +2 | head -n 1)
|
||||
if [ -z $NVMF_FIRST_TARGET_IP ]; then
|
||||
echo "no NIC for nvmf test"
|
||||
exit 0
|
||||
|
@ -22,28 +22,48 @@ function disconnect_init()
|
||||
$rpc_py nvmf_subsystem_create nqn.2016-06.io.spdk:cnode1 -a -s SPDK00000000000001
|
||||
|
||||
$rpc_py nvmf_subsystem_add_ns nqn.2016-06.io.spdk:cnode1 Malloc0
|
||||
$rpc_py nvmf_subsystem_add_listener nqn.2016-06.io.spdk:cnode1 -t $TEST_TRANSPORT -a $NVMF_FIRST_TARGET_IP -s $NVMF_PORT
|
||||
$rpc_py nvmf_subsystem_add_listener nqn.2016-06.io.spdk:cnode1 -t $TEST_TRANSPORT -a $1 -s $NVMF_PORT
|
||||
}
|
||||
|
||||
timing_enter target_disconnect
|
||||
|
||||
nvmftestinit
|
||||
|
||||
disconnect_init
|
||||
disconnect_init $NVMF_FIRST_TARGET_IP
|
||||
|
||||
# If perf doesn't shut down, this test will time out.
|
||||
$rootdir/examples/nvme/reconnect/reconnect -q 32 -o 4096 -w randrw -M 50 -t 20 -c 0xF -r "trtype:$TEST_TRANSPORT adrfam:IPv4 traddr:$NVMF_FIRST_TARGET_IP trsvcid:$NVMF_PORT" &
|
||||
perfpid=$!
|
||||
$rootdir/examples/nvme/reconnect/reconnect -q 32 -o 4096 -w randrw -M 50 -t 10 -c 0xF \
|
||||
-r "trtype:$TEST_TRANSPORT adrfam:IPv4 traddr:$NVMF_FIRST_TARGET_IP trsvcid:$NVMF_PORT" &
|
||||
reconnectpid=$!
|
||||
|
||||
sleep 5
|
||||
sleep 2
|
||||
kill -9 $nvmfpid
|
||||
|
||||
sleep 2
|
||||
disconnect_init
|
||||
disconnect_init $NVMF_FIRST_TARGET_IP
|
||||
|
||||
wait $perfpid
|
||||
wait $reconnectpid
|
||||
sync
|
||||
|
||||
if [ -n "$NVMF_SECOND_TARGET_IP" ]; then
|
||||
timing_enter failover_test
|
||||
|
||||
$rootdir/examples/nvme/reconnect/reconnect -q 32 -o 4096 -w randrw -M 50 -t 10 -c 0xF \
|
||||
-r "trtype:$TEST_TRANSPORT adrfam:IPv4 traddr:$NVMF_FIRST_TARGET_IP trsvcid:$NVMF_PORT alt_traddr:$NVMF_SECOND_TARGET_IP" &
|
||||
reconnectpid=$!
|
||||
|
||||
sleep 2
|
||||
kill -9 $nvmfpid
|
||||
|
||||
sleep 2
|
||||
disconnect_init $NVMF_SECOND_TARGET_IP
|
||||
|
||||
wait $reconnectpid
|
||||
sync
|
||||
|
||||
timing_exit failover_test
|
||||
fi
|
||||
|
||||
trap - SIGINT SIGTERM EXIT
|
||||
rm -f $PLUGIN_DIR/example_config_extended.fio || true
|
||||
nvmftestfini
|
||||
|
Loading…
Reference in New Issue
Block a user