From fb12887fd8bba3abdb788e99d3b60d03dfe36415 Mon Sep 17 00:00:00 2001 From: Curt Bruns Date: Thu, 10 Mar 2022 03:56:58 -0800 Subject: [PATCH] test: add multi-process Async Event Report test Modified the existing nvme aer test to include a multi-process option that verifies that two processes will receive an async event notification. Also added the multi-process aer test to the CI test suite. Signed-off-by: Curt Bruns Change-Id: I08731fad317d43dcfb1766d22a3f4c6aa1738d2a Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12293 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins Reviewed-by: Paul Luse Reviewed-by: Shuhei Matsumoto Reviewed-by: Jim Harris --- test/nvme/aer/aer.c | 251 +++++++++++++++++++++++++++++++++++++++----- test/nvme/nvme.sh | 2 + 2 files changed, 229 insertions(+), 24 deletions(-) diff --git a/test/nvme/aer/aer.c b/test/nvme/aer/aer.c index ec6e185eb..d5f6d576e 100644 --- a/test/nvme/aer/aer.c +++ b/test/nvme/aer/aer.c @@ -40,6 +40,13 @@ static char *g_touch_file; static int g_enable_temp_test = 0; /* Expected changed NS ID */ static uint32_t g_expected_ns_test = 0; +/* For multi-process test */ +static int g_multi_process_test = 0; +static bool g_parent_process; +static const char *g_sem_init_name = "/init"; +static const char *g_sem_child_name = "/child"; +static sem_t *g_sem_init_id; +static sem_t *g_sem_child_id; static void set_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) @@ -72,6 +79,9 @@ set_temp_threshold(struct dev *dev, uint32_t temp) rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, set_temp_completion, dev); if (rc == 0) { g_outstanding_commands++; + } else { + fprintf(stderr, "Submitting Admin cmd failed with rc: %d (%s)\n", \ + rc, (g_parent_process ? "Parent" : "Child")); } return rc; @@ -126,7 +136,7 @@ get_health_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) { struct dev *dev = cb_arg; - g_outstanding_commands --; + g_outstanding_commands--; if (spdk_nvme_cpl_is_error(cpl)) { printf("%s: get log page failed\n", dev->name); @@ -180,26 +190,48 @@ cleanup(void) static void aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) { - uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16; - struct dev *dev = arg; + struct dev *dev = arg; + uint32_t log_page_id; + uint32_t aen_event_info; + uint32_t aen_event_type; + union spdk_nvme_async_event_completion aen_cpl; + + aen_cpl.raw = cpl->cdw0; + aen_event_info = aen_cpl.bits.async_event_info; + aen_event_type = aen_cpl.bits.async_event_type; + log_page_id = aen_cpl.bits.log_page_identifier; if (spdk_nvme_cpl_is_error(cpl)) { - printf("%s: AER failed\n", dev->name); + fprintf(stderr, "%s: AER failed\n", dev->name); g_failed = 1; return; } - printf("%s: aer_cb for log page %d\n", dev->name, log_page_id); - - if (log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) { - /* Set the temperature threshold back to the original value - * so the AER doesn't trigger again. - */ + printf("%s: aer_cb for log page %d, aen_event_type: 0x%02x, aen_event_info: 0x%02x (%s)\n", \ + dev->name, log_page_id, aen_event_type, aen_event_info, \ + (g_parent_process ? "Parent" : "Child")); + /* Temp Test: Verify proper EventType, Event Info and Log Page. + * NOTE: QEMU NVMe controllers return Spare Below Threshold Status event info + * instead of Temperate Threshold even info which is why it's used in the check + * below. + */ + if ((log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) && \ + (aen_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_SMART) && \ + ((aen_event_info == SPDK_NVME_ASYNC_EVENT_TEMPERATURE_THRESHOLD) || \ + (aen_event_info == SPDK_NVME_ASYNC_EVENT_SPARE_BELOW_THRESHOLD))) { + /* Set the temperature threshold back to the original value to stop triggering */ + printf("aer_cb - Resetting Temp Threshold for device: %s (%s)\n", \ + dev->name, (g_parent_process ? "Parent" : "Child")); set_temp_threshold(dev, dev->orig_temp_threshold); get_health_log_page(dev); } else if (log_page_id == SPDK_NVME_LOG_CHANGED_NS_LIST) { + printf("aer_cb - Changed Namespace (%s)\n", \ + (g_parent_process ? "Parent" : "Child")); get_ns_state_test(dev, g_expected_ns_test); g_aer_done++; + } else { + printf("aer_cb - Unknown Log Page (%s)\n", \ + (g_parent_process ? "Parent" : "Child")); } } @@ -225,7 +257,8 @@ usage(const char *program_name) spdk_log_usage(stdout, "-L"); - printf(" -v verbose (enable warnings)\n"); + printf(" -i shared memory group ID\n"); + printf(" -m Multi-Process AER Test (only with Temp Test)\n"); printf(" -H show this usage\n"); } @@ -238,7 +271,7 @@ parse_args(int argc, char **argv, struct spdk_env_opts *env_opts) spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE); snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); - while ((op = getopt(argc, argv, "n:gr:t:HL:T")) != -1) { + while ((op = getopt(argc, argv, "gi:mn:r:t:HL:T")) != -1) { switch (op) { case 'n': val = spdk_strtol(optarg, 10); @@ -277,6 +310,16 @@ parse_args(int argc, char **argv, struct spdk_env_opts *env_opts) case 'H': usage(argv[0]); exit(EXIT_SUCCESS); + case 'i': + env_opts->shm_id = spdk_strtol(optarg, 10); + if (env_opts->shm_id < 0) { + fprintf(stderr, "Invalid shared memory ID\n"); + return env_opts->shm_id; + } + break; + case 'm': + g_multi_process_test = 1; + break; default: usage(argv[0]); return 1; @@ -364,7 +407,8 @@ spdk_aer_temperature_test(void) { struct dev *dev; - printf("Getting temperature thresholds of all controllers...\n"); + printf("Getting orig temperature thresholds of all controllers (%s)\n", + (g_parent_process ? "Parent" : "Child")); foreach_dev(dev) { /* Get the original temperature threshold */ get_temp_threshold(dev); @@ -391,16 +435,38 @@ spdk_aer_temperature_test(void) return g_failed; } - printf("Waiting for all controllers to trigger AER...\n"); - foreach_dev(dev) { - /* Set the temperature threshold to a low value */ - set_temp_threshold(dev, 200); + /* Only single process needs to set and verify lower threshold */ + if (g_parent_process) { + /* Wait until child has init'd and ready for test to continue */ + if (g_multi_process_test) { + sem_wait(g_sem_child_id); + } + printf("Setting all controllers temperature threshold low to trigger AER\n"); + foreach_dev(dev) { + /* Set the temperature threshold to a low value */ + set_temp_threshold(dev, 200); + } + + printf("Waiting for all controllers temperature threshold to be set lower\n"); + while (!g_failed && (g_temperature_done < g_num_devs)) { + foreach_dev(dev) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + } + g_temperature_done = 0; + + if (g_failed) { + return g_failed; + } } - if (g_failed) { - return g_failed; + printf("Waiting for all controllers to trigger AER and reset threshold (%s)\n", + (g_parent_process ? "Parent" : "Child")); + /* Let parent know init is done and it's okay to continue */ + if (!g_parent_process) { + sem_post(g_sem_child_id); } - + /* Waiting for AEN to be occur here */ while (!g_failed && (g_aer_done < g_num_devs || g_temperature_done < g_num_devs)) { foreach_dev(dev) { spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); @@ -445,6 +511,50 @@ spdk_aer_changed_ns_test(void) return 0; } +static int +setup_multi_process(void) +{ + pid_t pid; + int rc = 0; + + /* If AEN test was killed, remove named semaphore to start again */ + rc = sem_unlink(g_sem_init_name); + if (rc < 0 && errno != ENOENT) { + fprintf(stderr, "Init semaphore removal failure: %s", spdk_strerror(errno)); + return rc; + } + rc = sem_unlink(g_sem_child_name); + if (rc < 0 && errno != ENOENT) { + fprintf(stderr, "Child semaphore removal failure: %s", spdk_strerror(errno)); + return rc; + } + pid = fork(); + if (pid == -1) { + perror("Failed to fork\n"); + return -1; + } else if (pid == 0) { + printf("Child process pid: %d\n", getpid()); + g_parent_process = false; + g_sem_init_id = sem_open(g_sem_init_name, O_CREAT, 0600, 0); + g_sem_child_id = sem_open(g_sem_child_name, O_CREAT, 0600, 0); + if ((g_sem_init_id == SEM_FAILED) || (g_sem_child_id == SEM_FAILED)) { + fprintf(stderr, "Sem Open failed for child: %s\n", spdk_strerror(errno)); + return -1; + } + } + /* Parent process */ + else { + g_parent_process = true; + g_sem_init_id = sem_open(g_sem_init_name, O_CREAT, 0600, 0); + g_sem_child_id = sem_open(g_sem_child_name, O_CREAT, 0600, 0); + if ((g_sem_init_id == SEM_FAILED) || (g_sem_child_id == SEM_FAILED)) { + fprintf(stderr, "Sem Open failed for parent: %s\n", spdk_strerror(errno)); + return -1; + } + } + return 0; +} + int main(int argc, char **argv) { struct dev *dev; @@ -452,30 +562,83 @@ int main(int argc, char **argv) int rc; struct spdk_nvme_detach_ctx *detach_ctx = NULL; + spdk_env_opts_init(&opts); rc = parse_args(argc, argv, &opts); if (rc != 0) { return rc; } - spdk_env_opts_init(&opts); + if (g_multi_process_test) { + /* Multi-Process test only available with Temp Test */ + if (!g_enable_temp_test) { + fprintf(stderr, "Multi Process test only available with Temp Test (-T)\n"); + return 1; + } + if (opts.shm_id < 0) { + fprintf(stderr, "Multi Process requires shared memory id (-i )\n"); + return 1; + } + rc = setup_multi_process(); + if (rc != 0) { + fprintf(stderr, "Multi Process test failed to setup\n"); + return rc; + } + } else { + /* Only one process in test, set it to the parent process */ + g_parent_process = true; + } opts.name = "aer"; - opts.core_mask = "0x1"; + if (g_parent_process) { + opts.core_mask = "0x1"; + } else { + opts.core_mask = "0x2"; + } + + /* + * For multi-process test, parent (primary) and child (secondary) processes + * will execute all following code but DPDK setup is serialized + */ + if (!g_parent_process) { + if (sem_wait(g_sem_init_id) < 0) { + fprintf(stderr, "sem_wait failed for child process\n"); + return (-1); + } + } if (spdk_env_init(&opts) < 0) { fprintf(stderr, "Unable to initialize SPDK env\n"); return 1; } - printf("Asynchronous Event Request test\n"); + printf("Asynchronous Event Request test (%s)\n", + (g_parent_process ? "Parent" : "Child")); if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) { fprintf(stderr, "spdk_nvme_probe() failed\n"); return 1; } + if (g_num_devs == 0) { + fprintf(stderr, "No controllers found - exiting\n"); + g_failed = 1; + } if (g_failed) { goto done; } + if (g_parent_process && g_enable_temp_test) { + printf("Reset controller to setup AER completions for this process\n"); + foreach_dev(dev) { + if (spdk_nvme_ctrlr_reset(dev->ctrlr) < 0) { + fprintf(stderr, "nvme reset failed.\n"); + return -1; + } + } + } + if (g_parent_process && g_multi_process_test) { + /* Primary can release child/secondary for init now */ + sem_post(g_sem_init_id); + } + printf("Registering asynchronous event callbacks...\n"); foreach_dev(dev) { spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, aer_cb, dev); @@ -507,7 +670,7 @@ int main(int argc, char **argv) } } - printf("Cleaning up...\n"); + printf("Cleaning up...(%s)\n", (g_parent_process ? "Parent" : "Child")); while (g_outstanding_commands) { foreach_dev(dev) { @@ -515,6 +678,11 @@ int main(int argc, char **argv) } } + /* Only one process cleans up at a time - let child go first */ + if (g_multi_process_test && g_parent_process) { + /* Parent waits for child to clean up before executing clean up process */ + sem_wait(g_sem_child_id); + } /* unregister AER callback so we don't fail on aborted AERs when we close out qpairs. */ foreach_dev(dev) { spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, NULL, NULL); @@ -528,8 +696,43 @@ int main(int argc, char **argv) spdk_nvme_detach_poll(detach_ctx); } + /* Release semaphore to allow parent to cleanup */ + if (!g_parent_process) { + sem_post(g_sem_child_id); + sem_wait(g_sem_init_id); + } done: cleanup(); + /* Wait for child process to finish and verify it finished correctly before detaching resources */ + if (g_multi_process_test && g_parent_process) { + int status; + sem_post(g_sem_init_id); + wait(&status); + if (WIFEXITED(status)) { + /* Child ended normally */ + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "Child Failed with status: %d.\n", (int8_t)(WEXITSTATUS(status))); + g_failed = true; + } + } + if (sem_close(g_sem_init_id) != 0) { + perror("sem_close Failed for init\n"); + g_failed = true; + } + if (sem_close(g_sem_child_id) != 0) { + perror("sem_close Failed for child\n"); + g_failed = true; + } + + if (sem_unlink(g_sem_init_name) != 0) { + perror("sem_unlink Failed for init\n"); + g_failed = true; + } + if (sem_unlink(g_sem_child_name) != 0) { + perror("sem_unlink Failed for child\n"); + g_failed = true; + } + } return g_failed; } diff --git a/test/nvme/nvme.sh b/test/nvme/nvme.sh index 7f705e833..c729e9098 100755 --- a/test/nvme/nvme.sh +++ b/test/nvme/nvme.sh @@ -134,8 +134,10 @@ run_test "nvme_reserve" $testdir/reserve/reserve run_test "nvme_err_injection" $testdir/err_injection/err_injection run_test "nvme_overhead" $testdir/overhead/overhead -s 4096 -t 1 -H -i 0 run_test "nvme_arbitration" $SPDK_EXAMPLE_DIR/arbitration -t 3 -i 0 +run_test "nvme_single_aen" $testdir/aer/aer -T -i 0 -L log if [ $(uname) != "FreeBSD" ]; then + run_test "nvme_multi_aen" $testdir/aer/aer -m -T -i 0 -L log run_test "nvme_startup" $testdir/startup/startup -t 1000000 run_test "nvme_multi_secondary" nvme_multi_secondary trap - SIGINT SIGTERM EXIT