test: update multi-process Async Event Report test

Updated the existing nvme aer test to remove a race condition
where the aer_cb is executed and the g_temperature_done variable
is incremented just before it is reset back to 0, which causes
the test to loop indefinitely. Also changed the reset_ctrlr call
to check for non-zero instead of < 0 to check for failure.

Fixes #2559

Signed-off-by: Curt Bruns <curt.e.bruns@gmail.com>
Change-Id: Ib0679917684cdbd4c8038279426c6ec368be5f11
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13467
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Paul Luse <paul.e.luse@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Curt Bruns 2022-06-24 09:07:58 -07:00 committed by Tomasz Zawadzki
parent 2e283fcb67
commit 2a559a43af
2 changed files with 9 additions and 9 deletions

View File

@ -223,8 +223,10 @@ aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
((aen_event_info == SPDK_NVME_ASYNC_EVENT_TEMPERATURE_THRESHOLD) || \
(aen_event_info == SPDK_NVME_ASYNC_EVENT_SPARE_BELOW_THRESHOLD))) {
/* Set the temperature threshold back to the original value to stop triggering */
AER_PRINTF("aer_cb - Resetting Temp Threshold for device: %s\n", dev->name);
set_temp_threshold(dev, dev->orig_temp_threshold);
if (g_parent_process) {
AER_PRINTF("aer_cb - Resetting Temp Threshold for device: %s\n", dev->name);
set_temp_threshold(dev, dev->orig_temp_threshold);
}
get_health_log_page(dev);
} else if (log_page_id == SPDK_NVME_LOG_CHANGED_NS_LIST) {
AER_PRINTF("aer_cb - Changed Namespace\n");
@ -452,7 +454,6 @@ spdk_aer_temperature_test(void)
spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
}
}
g_temperature_done = 0;
if (g_failed) {
return g_failed;
@ -464,8 +465,8 @@ spdk_aer_temperature_test(void)
if (!g_parent_process) {
sem_post(g_sem_child_id);
}
/* Waiting for AEN to be occur here */
while (!g_failed && (g_aer_done < g_num_devs || g_temperature_done < g_num_devs)) {
/* Waiting for AEN to be occur here. Each device will increment g_aer_done on an AEN */
while (!g_failed && (g_aer_done < g_num_devs)) {
foreach_dev(dev) {
spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
}
@ -628,7 +629,7 @@ main(int argc, char **argv)
if (g_parent_process && g_enable_temp_test) {
AER_PRINTF("Reset controller to setup AER completions for this process\n");
foreach_dev(dev) {
if (spdk_nvme_ctrlr_reset(dev->ctrlr) < 0) {
if (spdk_nvme_ctrlr_reset(dev->ctrlr) != 0) {
AER_FPRINTF(stderr, "nvme reset failed.\n");
return -1;
}

View File

@ -133,11 +133,10 @@ run_test "nvme_reserve" $testdir/reserve/reserve
run_test "nvme_err_injection" $testdir/err_injection/err_injection
run_test "nvme_overhead" $testdir/overhead/overhead -s 4096 -t 1 -H -i 0
run_test "nvme_arbitration" $SPDK_EXAMPLE_DIR/arbitration -t 3 -i 0
#run_test "nvme_single_aen" $testdir/aer/aer -T -i 0 -L log
#NOTE: Disabling aen tests for now due to issue #2559
run_test "nvme_single_aen" $testdir/aer/aer -T -i 0 -L log
if [ $(uname) != "FreeBSD" ]; then
#run_test "nvme_multi_aen" $testdir/aer/aer -m -T -i 0 -L log
run_test "nvme_multi_aen" $testdir/aer/aer -m -T -i 0 -L log
run_test "nvme_startup" $testdir/startup/startup -t 1000000
run_test "nvme_multi_secondary" nvme_multi_secondary
trap - SIGINT SIGTERM EXIT