test: add multi-process Async Event Report test
Modified the existing nvme aer test to include a multi-process option that verifies that two processes will receive an async event notification. Also added the multi-process aer test to the CI test suite. Signed-off-by: Curt Bruns <curt.e.bruns@gmail.com> Change-Id: I08731fad317d43dcfb1766d22a3f4c6aa1738d2a Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12293 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Paul Luse <paul.e.luse@intel.com> Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
0b9100e8a5
commit
fb12887fd8
@ -40,6 +40,13 @@ static char *g_touch_file;
|
||||
static int g_enable_temp_test = 0;
|
||||
/* Expected changed NS ID */
|
||||
static uint32_t g_expected_ns_test = 0;
|
||||
/* For multi-process test */
|
||||
static int g_multi_process_test = 0;
|
||||
static bool g_parent_process;
|
||||
static const char *g_sem_init_name = "/init";
|
||||
static const char *g_sem_child_name = "/child";
|
||||
static sem_t *g_sem_init_id;
|
||||
static sem_t *g_sem_child_id;
|
||||
|
||||
static void
|
||||
set_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
|
||||
@ -72,6 +79,9 @@ set_temp_threshold(struct dev *dev, uint32_t temp)
|
||||
rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, set_temp_completion, dev);
|
||||
if (rc == 0) {
|
||||
g_outstanding_commands++;
|
||||
} else {
|
||||
fprintf(stderr, "Submitting Admin cmd failed with rc: %d (%s)\n", \
|
||||
rc, (g_parent_process ? "Parent" : "Child"));
|
||||
}
|
||||
|
||||
return rc;
|
||||
@ -126,7 +136,7 @@ get_health_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
|
||||
{
|
||||
struct dev *dev = cb_arg;
|
||||
|
||||
g_outstanding_commands --;
|
||||
g_outstanding_commands--;
|
||||
|
||||
if (spdk_nvme_cpl_is_error(cpl)) {
|
||||
printf("%s: get log page failed\n", dev->name);
|
||||
@ -180,26 +190,48 @@ cleanup(void)
|
||||
static void
|
||||
aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
|
||||
{
|
||||
uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
|
||||
struct dev *dev = arg;
|
||||
struct dev *dev = arg;
|
||||
uint32_t log_page_id;
|
||||
uint32_t aen_event_info;
|
||||
uint32_t aen_event_type;
|
||||
union spdk_nvme_async_event_completion aen_cpl;
|
||||
|
||||
aen_cpl.raw = cpl->cdw0;
|
||||
aen_event_info = aen_cpl.bits.async_event_info;
|
||||
aen_event_type = aen_cpl.bits.async_event_type;
|
||||
log_page_id = aen_cpl.bits.log_page_identifier;
|
||||
|
||||
if (spdk_nvme_cpl_is_error(cpl)) {
|
||||
printf("%s: AER failed\n", dev->name);
|
||||
fprintf(stderr, "%s: AER failed\n", dev->name);
|
||||
g_failed = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
printf("%s: aer_cb for log page %d\n", dev->name, log_page_id);
|
||||
|
||||
if (log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) {
|
||||
/* Set the temperature threshold back to the original value
|
||||
* so the AER doesn't trigger again.
|
||||
*/
|
||||
printf("%s: aer_cb for log page %d, aen_event_type: 0x%02x, aen_event_info: 0x%02x (%s)\n", \
|
||||
dev->name, log_page_id, aen_event_type, aen_event_info, \
|
||||
(g_parent_process ? "Parent" : "Child"));
|
||||
/* Temp Test: Verify proper EventType, Event Info and Log Page.
|
||||
* NOTE: QEMU NVMe controllers return Spare Below Threshold Status event info
|
||||
* instead of Temperate Threshold even info which is why it's used in the check
|
||||
* below.
|
||||
*/
|
||||
if ((log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) && \
|
||||
(aen_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_SMART) && \
|
||||
((aen_event_info == SPDK_NVME_ASYNC_EVENT_TEMPERATURE_THRESHOLD) || \
|
||||
(aen_event_info == SPDK_NVME_ASYNC_EVENT_SPARE_BELOW_THRESHOLD))) {
|
||||
/* Set the temperature threshold back to the original value to stop triggering */
|
||||
printf("aer_cb - Resetting Temp Threshold for device: %s (%s)\n", \
|
||||
dev->name, (g_parent_process ? "Parent" : "Child"));
|
||||
set_temp_threshold(dev, dev->orig_temp_threshold);
|
||||
get_health_log_page(dev);
|
||||
} else if (log_page_id == SPDK_NVME_LOG_CHANGED_NS_LIST) {
|
||||
printf("aer_cb - Changed Namespace (%s)\n", \
|
||||
(g_parent_process ? "Parent" : "Child"));
|
||||
get_ns_state_test(dev, g_expected_ns_test);
|
||||
g_aer_done++;
|
||||
} else {
|
||||
printf("aer_cb - Unknown Log Page (%s)\n", \
|
||||
(g_parent_process ? "Parent" : "Child"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -225,7 +257,8 @@ usage(const char *program_name)
|
||||
|
||||
spdk_log_usage(stdout, "-L");
|
||||
|
||||
printf(" -v verbose (enable warnings)\n");
|
||||
printf(" -i <id> shared memory group ID\n");
|
||||
printf(" -m Multi-Process AER Test (only with Temp Test)\n");
|
||||
printf(" -H show this usage\n");
|
||||
}
|
||||
|
||||
@ -238,7 +271,7 @@ parse_args(int argc, char **argv, struct spdk_env_opts *env_opts)
|
||||
spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE);
|
||||
snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
|
||||
|
||||
while ((op = getopt(argc, argv, "n:gr:t:HL:T")) != -1) {
|
||||
while ((op = getopt(argc, argv, "gi:mn:r:t:HL:T")) != -1) {
|
||||
switch (op) {
|
||||
case 'n':
|
||||
val = spdk_strtol(optarg, 10);
|
||||
@ -277,6 +310,16 @@ parse_args(int argc, char **argv, struct spdk_env_opts *env_opts)
|
||||
case 'H':
|
||||
usage(argv[0]);
|
||||
exit(EXIT_SUCCESS);
|
||||
case 'i':
|
||||
env_opts->shm_id = spdk_strtol(optarg, 10);
|
||||
if (env_opts->shm_id < 0) {
|
||||
fprintf(stderr, "Invalid shared memory ID\n");
|
||||
return env_opts->shm_id;
|
||||
}
|
||||
break;
|
||||
case 'm':
|
||||
g_multi_process_test = 1;
|
||||
break;
|
||||
default:
|
||||
usage(argv[0]);
|
||||
return 1;
|
||||
@ -364,7 +407,8 @@ spdk_aer_temperature_test(void)
|
||||
{
|
||||
struct dev *dev;
|
||||
|
||||
printf("Getting temperature thresholds of all controllers...\n");
|
||||
printf("Getting orig temperature thresholds of all controllers (%s)\n",
|
||||
(g_parent_process ? "Parent" : "Child"));
|
||||
foreach_dev(dev) {
|
||||
/* Get the original temperature threshold */
|
||||
get_temp_threshold(dev);
|
||||
@ -391,16 +435,38 @@ spdk_aer_temperature_test(void)
|
||||
return g_failed;
|
||||
}
|
||||
|
||||
printf("Waiting for all controllers to trigger AER...\n");
|
||||
foreach_dev(dev) {
|
||||
/* Set the temperature threshold to a low value */
|
||||
set_temp_threshold(dev, 200);
|
||||
/* Only single process needs to set and verify lower threshold */
|
||||
if (g_parent_process) {
|
||||
/* Wait until child has init'd and ready for test to continue */
|
||||
if (g_multi_process_test) {
|
||||
sem_wait(g_sem_child_id);
|
||||
}
|
||||
printf("Setting all controllers temperature threshold low to trigger AER\n");
|
||||
foreach_dev(dev) {
|
||||
/* Set the temperature threshold to a low value */
|
||||
set_temp_threshold(dev, 200);
|
||||
}
|
||||
|
||||
printf("Waiting for all controllers temperature threshold to be set lower\n");
|
||||
while (!g_failed && (g_temperature_done < g_num_devs)) {
|
||||
foreach_dev(dev) {
|
||||
spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
|
||||
}
|
||||
}
|
||||
g_temperature_done = 0;
|
||||
|
||||
if (g_failed) {
|
||||
return g_failed;
|
||||
}
|
||||
}
|
||||
|
||||
if (g_failed) {
|
||||
return g_failed;
|
||||
printf("Waiting for all controllers to trigger AER and reset threshold (%s)\n",
|
||||
(g_parent_process ? "Parent" : "Child"));
|
||||
/* Let parent know init is done and it's okay to continue */
|
||||
if (!g_parent_process) {
|
||||
sem_post(g_sem_child_id);
|
||||
}
|
||||
|
||||
/* Waiting for AEN to be occur here */
|
||||
while (!g_failed && (g_aer_done < g_num_devs || g_temperature_done < g_num_devs)) {
|
||||
foreach_dev(dev) {
|
||||
spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
|
||||
@ -445,6 +511,50 @@ spdk_aer_changed_ns_test(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
setup_multi_process(void)
|
||||
{
|
||||
pid_t pid;
|
||||
int rc = 0;
|
||||
|
||||
/* If AEN test was killed, remove named semaphore to start again */
|
||||
rc = sem_unlink(g_sem_init_name);
|
||||
if (rc < 0 && errno != ENOENT) {
|
||||
fprintf(stderr, "Init semaphore removal failure: %s", spdk_strerror(errno));
|
||||
return rc;
|
||||
}
|
||||
rc = sem_unlink(g_sem_child_name);
|
||||
if (rc < 0 && errno != ENOENT) {
|
||||
fprintf(stderr, "Child semaphore removal failure: %s", spdk_strerror(errno));
|
||||
return rc;
|
||||
}
|
||||
pid = fork();
|
||||
if (pid == -1) {
|
||||
perror("Failed to fork\n");
|
||||
return -1;
|
||||
} else if (pid == 0) {
|
||||
printf("Child process pid: %d\n", getpid());
|
||||
g_parent_process = false;
|
||||
g_sem_init_id = sem_open(g_sem_init_name, O_CREAT, 0600, 0);
|
||||
g_sem_child_id = sem_open(g_sem_child_name, O_CREAT, 0600, 0);
|
||||
if ((g_sem_init_id == SEM_FAILED) || (g_sem_child_id == SEM_FAILED)) {
|
||||
fprintf(stderr, "Sem Open failed for child: %s\n", spdk_strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
/* Parent process */
|
||||
else {
|
||||
g_parent_process = true;
|
||||
g_sem_init_id = sem_open(g_sem_init_name, O_CREAT, 0600, 0);
|
||||
g_sem_child_id = sem_open(g_sem_child_name, O_CREAT, 0600, 0);
|
||||
if ((g_sem_init_id == SEM_FAILED) || (g_sem_child_id == SEM_FAILED)) {
|
||||
fprintf(stderr, "Sem Open failed for parent: %s\n", spdk_strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct dev *dev;
|
||||
@ -452,30 +562,83 @@ int main(int argc, char **argv)
|
||||
int rc;
|
||||
struct spdk_nvme_detach_ctx *detach_ctx = NULL;
|
||||
|
||||
spdk_env_opts_init(&opts);
|
||||
rc = parse_args(argc, argv, &opts);
|
||||
if (rc != 0) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
spdk_env_opts_init(&opts);
|
||||
if (g_multi_process_test) {
|
||||
/* Multi-Process test only available with Temp Test */
|
||||
if (!g_enable_temp_test) {
|
||||
fprintf(stderr, "Multi Process test only available with Temp Test (-T)\n");
|
||||
return 1;
|
||||
}
|
||||
if (opts.shm_id < 0) {
|
||||
fprintf(stderr, "Multi Process requires shared memory id (-i <id>)\n");
|
||||
return 1;
|
||||
}
|
||||
rc = setup_multi_process();
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "Multi Process test failed to setup\n");
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
/* Only one process in test, set it to the parent process */
|
||||
g_parent_process = true;
|
||||
}
|
||||
opts.name = "aer";
|
||||
opts.core_mask = "0x1";
|
||||
if (g_parent_process) {
|
||||
opts.core_mask = "0x1";
|
||||
} else {
|
||||
opts.core_mask = "0x2";
|
||||
}
|
||||
|
||||
/*
|
||||
* For multi-process test, parent (primary) and child (secondary) processes
|
||||
* will execute all following code but DPDK setup is serialized
|
||||
*/
|
||||
if (!g_parent_process) {
|
||||
if (sem_wait(g_sem_init_id) < 0) {
|
||||
fprintf(stderr, "sem_wait failed for child process\n");
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
if (spdk_env_init(&opts) < 0) {
|
||||
fprintf(stderr, "Unable to initialize SPDK env\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Asynchronous Event Request test\n");
|
||||
printf("Asynchronous Event Request test (%s)\n",
|
||||
(g_parent_process ? "Parent" : "Child"));
|
||||
|
||||
if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) {
|
||||
fprintf(stderr, "spdk_nvme_probe() failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (g_num_devs == 0) {
|
||||
fprintf(stderr, "No controllers found - exiting\n");
|
||||
g_failed = 1;
|
||||
}
|
||||
if (g_failed) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (g_parent_process && g_enable_temp_test) {
|
||||
printf("Reset controller to setup AER completions for this process\n");
|
||||
foreach_dev(dev) {
|
||||
if (spdk_nvme_ctrlr_reset(dev->ctrlr) < 0) {
|
||||
fprintf(stderr, "nvme reset failed.\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (g_parent_process && g_multi_process_test) {
|
||||
/* Primary can release child/secondary for init now */
|
||||
sem_post(g_sem_init_id);
|
||||
}
|
||||
|
||||
printf("Registering asynchronous event callbacks...\n");
|
||||
foreach_dev(dev) {
|
||||
spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, aer_cb, dev);
|
||||
@ -507,7 +670,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
printf("Cleaning up...\n");
|
||||
printf("Cleaning up...(%s)\n", (g_parent_process ? "Parent" : "Child"));
|
||||
|
||||
while (g_outstanding_commands) {
|
||||
foreach_dev(dev) {
|
||||
@ -515,6 +678,11 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
/* Only one process cleans up at a time - let child go first */
|
||||
if (g_multi_process_test && g_parent_process) {
|
||||
/* Parent waits for child to clean up before executing clean up process */
|
||||
sem_wait(g_sem_child_id);
|
||||
}
|
||||
/* unregister AER callback so we don't fail on aborted AERs when we close out qpairs. */
|
||||
foreach_dev(dev) {
|
||||
spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, NULL, NULL);
|
||||
@ -528,8 +696,43 @@ int main(int argc, char **argv)
|
||||
spdk_nvme_detach_poll(detach_ctx);
|
||||
}
|
||||
|
||||
/* Release semaphore to allow parent to cleanup */
|
||||
if (!g_parent_process) {
|
||||
sem_post(g_sem_child_id);
|
||||
sem_wait(g_sem_init_id);
|
||||
}
|
||||
done:
|
||||
cleanup();
|
||||
|
||||
/* Wait for child process to finish and verify it finished correctly before detaching resources */
|
||||
if (g_multi_process_test && g_parent_process) {
|
||||
int status;
|
||||
sem_post(g_sem_init_id);
|
||||
wait(&status);
|
||||
if (WIFEXITED(status)) {
|
||||
/* Child ended normally */
|
||||
if (WEXITSTATUS(status) != 0) {
|
||||
fprintf(stderr, "Child Failed with status: %d.\n", (int8_t)(WEXITSTATUS(status)));
|
||||
g_failed = true;
|
||||
}
|
||||
}
|
||||
if (sem_close(g_sem_init_id) != 0) {
|
||||
perror("sem_close Failed for init\n");
|
||||
g_failed = true;
|
||||
}
|
||||
if (sem_close(g_sem_child_id) != 0) {
|
||||
perror("sem_close Failed for child\n");
|
||||
g_failed = true;
|
||||
}
|
||||
|
||||
if (sem_unlink(g_sem_init_name) != 0) {
|
||||
perror("sem_unlink Failed for init\n");
|
||||
g_failed = true;
|
||||
}
|
||||
if (sem_unlink(g_sem_child_name) != 0) {
|
||||
perror("sem_unlink Failed for child\n");
|
||||
g_failed = true;
|
||||
}
|
||||
}
|
||||
return g_failed;
|
||||
}
|
||||
|
@ -134,8 +134,10 @@ run_test "nvme_reserve" $testdir/reserve/reserve
|
||||
run_test "nvme_err_injection" $testdir/err_injection/err_injection
|
||||
run_test "nvme_overhead" $testdir/overhead/overhead -s 4096 -t 1 -H -i 0
|
||||
run_test "nvme_arbitration" $SPDK_EXAMPLE_DIR/arbitration -t 3 -i 0
|
||||
run_test "nvme_single_aen" $testdir/aer/aer -T -i 0 -L log
|
||||
|
||||
if [ $(uname) != "FreeBSD" ]; then
|
||||
run_test "nvme_multi_aen" $testdir/aer/aer -m -T -i 0 -L log
|
||||
run_test "nvme_startup" $testdir/startup/startup -t 1000000
|
||||
run_test "nvme_multi_secondary" nvme_multi_secondary
|
||||
trap - SIGINT SIGTERM EXIT
|
||||
|
Loading…
Reference in New Issue
Block a user