diff --git a/CHANGELOG.md b/CHANGELOG.md index 8618d8815..21dd549ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,13 @@ New `spdk_bdev_copy_blocks` and `spdk_bdev_get_max_copy` APIs to support copy co A new API `spdk_bdev_io_get_submit_tsc` was added to get the submit_tsc of the bdev I/O. +### event + +Added core lock file mechanism to prevent the same CPU cores from being used by multiple +SPDK application instances. + +Added a command line switch to disable CPU locks on SPDK startup. + ## v22.09 ### accel diff --git a/lib/event/app.c b/lib/event/app.c index a9f377093..2aebcd0f4 100644 --- a/lib/event/app.c +++ b/lib/event/app.c @@ -31,6 +31,8 @@ #define SPDK_APP_DPDK_DEFAULT_BASE_VIRTADDR 0x200000000000 #define SPDK_APP_DEFAULT_CORE_LIMIT 0x140000000 /* 5 GiB */ +#define MAX_CPU_CORES 128 + struct spdk_app { const char *json_config_file; bool json_config_ignore_errors; @@ -49,6 +51,9 @@ static bool g_delay_subsystem_init = false; static bool g_shutdown_sig_received = false; static char *g_executable_name; static struct spdk_app_opts g_default_opts; +static bool g_disable_cpumask_locks = false; + +static int g_core_locks[MAX_CPU_CORES]; int spdk_app_get_shm_id(void) @@ -116,6 +121,8 @@ static const struct option g_cmdline_options[] = { {"base-virtaddr", required_argument, NULL, BASE_VIRTADDR_OPT_IDX}, #define ENV_CONTEXT_OPT_IDX 266 {"env-context", required_argument, NULL, ENV_CONTEXT_OPT_IDX}, +#define DISABLE_CPUMASK_LOCKS_OPT_IDX 267 + {"disable-cpumask-locks", no_argument, NULL, DISABLE_CPUMASK_LOCKS_OPT_IDX}, }; static void @@ -508,6 +515,86 @@ app_copy_opts(struct spdk_app_opts *opts, struct spdk_app_opts *opts_user, size_ #undef SET_FIELD } +static void +unclaim_cpu_cores(void) +{ + char core_name[40]; + uint32_t i; + + for (i = 0; i < MAX_CPU_CORES; i++) { + if (g_core_locks[i] != -1) { + snprintf(core_name, sizeof(core_name), "/var/tmp/spdk_cpu_lock_%03d", i); + close(g_core_locks[i]); + unlink(core_name); + } + } +} + +static int +claim_cpu_cores(uint32_t *failed_core) +{ + char core_name[40]; + int core_fd, pid; + int *core_map; + uint32_t core; + + struct flock core_lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + + SPDK_ENV_FOREACH_CORE(core) { + snprintf(core_name, sizeof(core_name), "/var/tmp/spdk_cpu_lock_%03d", core); + core_fd = open(core_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (core_fd == -1) { + SPDK_ERRLOG("Could not open %s (%s).\n", core_name, spdk_strerror(errno)); + /* Return number of core we failed to claim. */ + goto error; + } + + if (ftruncate(core_fd, sizeof(int)) != 0) { + SPDK_ERRLOG("Could not truncate %s (%s).\n", core_name, spdk_strerror(errno)); + close(core_fd); + goto error; + } + + core_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, MAP_SHARED, core_fd, 0); + if (core_map == MAP_FAILED) { + SPDK_ERRLOG("Could not mmap core %s (%s).\n", core_name, spdk_strerror(errno)); + close(core_fd); + goto error; + } + + if (fcntl(core_fd, F_SETLK, &core_lock) != 0) { + pid = *core_map; + SPDK_ERRLOG("Cannot create lock on core %" PRIu32 ", probably process %d has claimed it.\n", + core, pid); + munmap(core_map, sizeof(int)); + close(core_fd); + goto error; + } + + /* We write the PID to the core lock file so that other processes trying + * to claim the same core will know what process is holding the lock. */ + *core_map = (int)getpid(); + munmap(core_map, sizeof(int)); + g_core_locks[core] = core_fd; + /* Keep core_fd open to maintain the lock. */ + } + + return 0; + +error: + if (failed_core != NULL) { + /* Set number of core we failed to claim. */ + *failed_core = core; + } + unclaim_cpu_cores(); + return -1; +} + int spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn, void *arg1) @@ -518,6 +605,7 @@ spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn, static bool g_env_was_setup = false; struct spdk_app_opts opts_local = {}; struct spdk_app_opts *opts = &opts_local; + uint32_t i; if (!opts_user) { SPDK_ERRLOG("opts_user should not be NULL\n"); @@ -583,6 +671,21 @@ spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn, } spdk_log_open(opts->log); + + /* Initialize each lock to -1 to indicate "empty" status */ + for (i = 0; i < MAX_CPU_CORES; i++) { + g_core_locks[i] = -1; + } + + if (!g_disable_cpumask_locks) { + if (claim_cpu_cores(NULL)) { + SPDK_ERRLOG("Unable to acquire lock on assigned core mask - exiting.\n"); + return 1; + } + } else { + SPDK_NOTICELOG("CPU core locks deactivated.\n"); + } + SPDK_NOTICELOG("Total cores available: %d\n", spdk_env_get_core_count()); if ((rc = spdk_reactors_init(opts->msg_mempool_size)) != 0) { @@ -639,6 +742,7 @@ spdk_app_fini(void) spdk_reactors_fini(); spdk_env_fini(); spdk_log_close(); + unclaim_cpu_cores(); } static void @@ -718,6 +822,7 @@ usage(void (*app_usage)(void)) { printf("%dMB)\n", g_default_opts.mem_size >= 0 ? g_default_opts.mem_size : 0); } + printf(" --disable-cpumask-locks Disable CPU core lock files.\n"); printf(" --silence-noticelog disable notice level logging to stderr\n"); printf(" -u, --no-pci disable PCI access\n"); printf(" --wait-for-rpc wait for RPCs to initialize subsystems\n"); @@ -843,6 +948,9 @@ spdk_app_parse_args(int argc, char **argv, struct spdk_app_opts *opts, case CPUMASK_OPT_IDX: opts->reactor_mask = optarg; break; + case DISABLE_CPUMASK_LOCKS_OPT_IDX: + g_disable_cpumask_locks = true; + break; case MEM_CHANNELS_OPT_IDX: opts->mem_channel = spdk_strtol(optarg, 0); if (opts->mem_channel < 0) { diff --git a/scripts/setup.sh b/scripts/setup.sh index 8b0fde7ec..a1b20a048 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -422,6 +422,7 @@ function cleanup_linux() { files_to_clean+=("$dir/"*) done file_locks+=(/var/tmp/spdk_pci_lock*) + file_locks+=(/var/tmp/spdk_cpu_lock*) files_to_clean+=(/dev/shm/@(@($match_spdk)_trace|spdk_iscsi_conns)*) files_to_clean+=("${file_locks[@]}") diff --git a/test/iscsi_tgt/sock/sock.sh b/test/iscsi_tgt/sock/sock.sh index 6d28a1170..8e08bcaec 100755 --- a/test/iscsi_tgt/sock/sock.sh +++ b/test/iscsi_tgt/sock/sock.sh @@ -119,39 +119,39 @@ timing_enter sock_ssl_server echo "Testing SSL server path" # start echo server using hello_sock echo server -$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S $PSK & +$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S $PSK -m 0x1 & server_pid=$! trap 'killprocess $server_pid; iscsitestfini; exit 1' SIGINT SIGTERM EXIT waitforlisten $server_pid # send message using hello_sock client message="**MESSAGE:This is a test message from the hello_sock client with ssl**" -response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK) +response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -m 0x2) if ! echo "$response" | grep -q "$message"; then exit 1 fi # send message using hello_sock client using TLS 1.3 message="**MESSAGE:This is a test message from the hello_sock client with ssl using TLS 1.3**" -response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 13) +response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 13 -m 0x2) if ! echo "$response" | grep -q "$message"; then exit 1 fi # send message using hello_sock client using TLS 1.2 message="**MESSAGE:This is a test message from the hello_sock client with ssl using TLS 1.2**" -response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 12) +response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 12 -m 0x2) if ! echo "$response" | grep -q "$message"; then exit 1 fi # send message using hello_sock client using incorrect TLS 7 message="**MESSAGE:This is a test message from the hello_sock client with ssl using incorrect TLS 7**" -echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 7 && exit 1 +echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 7 -m 0x2 && exit 1 # send message using hello_sock client with KTLS disabled message="**MESSAGE:This is a test message from the hello_sock client with KTLS disabled**" -response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -k) +response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -k -m 0x2) if ! echo "$response" | grep -q "$message"; then exit 1 fi @@ -187,11 +187,11 @@ fi # send message using hello_sock client with unmatching PSK KEY, expect a failure message="**MESSAGE:This is a test message from the hello_sock client with unmatching psk_key**" -echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -E 4321DEADBEEF1234 && exit 1 +echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -E 4321DEADBEEF1234 -m 0x2 && exit 1 # send message using hello_sock client with unmatching PSK IDENTITY, expect a failure message="**MESSAGE:This is a test message from the hello_sock client with unmatching psk_key**" -echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -I WRONG_PSK_ID && exit 1 +echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -I WRONG_PSK_ID -m 0x2 && exit 1 trap '-' SIGINT SIGTERM EXIT # NOTE: socat returns code 143 on SIGINT @@ -206,7 +206,7 @@ timing_exit sock_ssl_server timing_enter sock_server # start echo server using hello_sock echo server -$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S -N "posix" & +$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S -N "posix" -m 0x1 & server_pid=$! trap 'killprocess $server_pid; iscsitestfini; exit 1' SIGINT SIGTERM EXIT waitforlisten $server_pid diff --git a/test/nvmf/host/bdevperf.sh b/test/nvmf/host/bdevperf.sh index a166eac24..2e35effef 100755 --- a/test/nvmf/host/bdevperf.sh +++ b/test/nvmf/host/bdevperf.sh @@ -9,7 +9,7 @@ MALLOC_BDEV_SIZE=64 MALLOC_BLOCK_SIZE=512 function tgt_init() { - nvmfappstart -m 0xF + nvmfappstart -m 0xE $rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 $rpc_py bdev_malloc_create $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE -b Malloc0 diff --git a/test/nvmf/host/failover.sh b/test/nvmf/host/failover.sh index eebfd4c2d..d8ebe4f4c 100755 --- a/test/nvmf/host/failover.sh +++ b/test/nvmf/host/failover.sh @@ -14,7 +14,7 @@ bdevperf_rpc_sock=/var/tmp/bdevperf.sock nvmftestinit -nvmfappstart -m 0xF +nvmfappstart -m 0xE $rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 $rpc_py bdev_malloc_create $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE -b Malloc0 diff --git a/test/nvmf/host/multicontroller.sh b/test/nvmf/host/multicontroller.sh index 8c1ff2b5e..d3219b337 100755 --- a/test/nvmf/host/multicontroller.sh +++ b/test/nvmf/host/multicontroller.sh @@ -19,7 +19,7 @@ fi nvmftestinit -nvmfappstart -m 0xF +nvmfappstart -m 0xE $rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 diff --git a/test/nvmf/target/nvmf_vhost.sh b/test/nvmf/target/nvmf_vhost.sh index 89b115927..18275eab3 100755 --- a/test/nvmf/target/nvmf_vhost.sh +++ b/test/nvmf/target/nvmf_vhost.sh @@ -20,7 +20,7 @@ vhosttestinit nvmftestinit # Start Apps -"${NVMF_APP[@]}" -r $NVMF_SOCK & +"${NVMF_APP[@]}" -r $NVMF_SOCK -m 0x1 & nvmfpid=$! waitforlisten $nvmfpid $NVMF_SOCK @@ -28,7 +28,7 @@ trap 'process_shm --id $NVMF_APP_SHM_ID; nvmftestfini; exit 1' SIGINT SIGTERM EX mkdir -p "$(get_vhost_dir 3)" -"${VHOST_APP[@]}" -S "$(get_vhost_dir 3)" & +"${VHOST_APP[@]}" -S "$(get_vhost_dir 3)" -m 0x2 & vhostpid=$! waitforlisten $vhostpid $NVMF_SOCK diff --git a/test/nvmf/target/zcopy.sh b/test/nvmf/target/zcopy.sh index 602b40da5..deec471ee 100755 --- a/test/nvmf/target/zcopy.sh +++ b/test/nvmf/target/zcopy.sh @@ -7,7 +7,7 @@ source $rootdir/test/common/autotest_common.sh source $rootdir/test/nvmf/common.sh nvmftestinit -nvmfappstart +nvmfappstart -m 0x2 if [ "$TEST_TRANSPORT" != tcp ]; then echo "Unsupported transport: $TEST_TRANSPORT" diff --git a/test/sma/crypto.sh b/test/sma/crypto.sh index b0d8ffc3b..0ed9a7f14 100755 --- a/test/sma/crypto.sh +++ b/test/sma/crypto.sh @@ -140,10 +140,10 @@ verify_crypto_volume() { trap "cleanup; exit 1" SIGINT SIGTERM EXIT -"$rootdir/build/bin/spdk_tgt" & +"$rootdir/build/bin/spdk_tgt" -m 0x1 & hostpid=$! -"$rootdir/build/bin/spdk_tgt" -r "$tgtsock" & +"$rootdir/build/bin/spdk_tgt" -r "$tgtsock" -m 0x2 & tgtpid=$! $rootdir/scripts/sma.py -c <( diff --git a/test/sma/discovery.sh b/test/sma/discovery.sh index 8547450ab..81e0d1b79 100755 --- a/test/sma/discovery.sh +++ b/test/sma/discovery.sh @@ -129,13 +129,13 @@ function detach_volume() { trap "cleanup; exit 1" SIGINT SIGTERM EXIT # Start two remote targets -$rootdir/build/bin/spdk_tgt -r $t1sock & +$rootdir/build/bin/spdk_tgt -r $t1sock -m 0x1 & t1pid=$! -$rootdir/build/bin/spdk_tgt -r $t2sock & +$rootdir/build/bin/spdk_tgt -r $t2sock -m 0x2 & t2pid=$! # One target that the SMA will configure -$rootdir/build/bin/spdk_tgt & +$rootdir/build/bin/spdk_tgt -m 0x4 & tgtpid=$! # And finally the SMA itself diff --git a/test/vhost/fuzz/fuzz.sh b/test/vhost/fuzz/fuzz.sh index 7b77c5493..3145475b1 100755 --- a/test/vhost/fuzz/fuzz.sh +++ b/test/vhost/fuzz/fuzz.sh @@ -5,7 +5,10 @@ rootdir=$(readlink -f $(dirname $0))/../../.. source $rootdir/test/common/autotest_common.sh source "$rootdir/scripts/common.sh" -VHOST_APP+=(-p 0) +# This test fails, if second VHOST_FUZZ_APP call is run on +# a different core than VHOST_APP. This problem has been +# explained in GitHub issue #2732. +VHOST_APP+=(-p 0 --disable-cpumask-locks) FUZZ_RPC_SOCK="/var/tmp/spdk_fuzz.sock" VHOST_FUZZ_APP+=(-r "$FUZZ_RPC_SOCK" -g -s 256 --wait-for-rpc) diff --git a/test/vhost/initiator/blockdev.sh b/test/vhost/initiator/blockdev.sh index 9f4181318..082911841 100755 --- a/test/vhost/initiator/blockdev.sh +++ b/test/vhost/initiator/blockdev.sh @@ -19,7 +19,7 @@ function err_cleanup() { # start vhost and configure it trap 'err_cleanup; exit 1' SIGINT SIGTERM EXIT -$SPDK_BIN_DIR/vhost -m 0xf & +$SPDK_BIN_DIR/vhost -m 0xe & vhost_pid=$! waitforlisten $vhost_pid @@ -35,12 +35,12 @@ rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 0 Nvme0n1p0 rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 1 Nvme0n1p1 rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 2 Nvme0n1p2 rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 3 Nvme0n1p3 -[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_scsi0.0 | jq -r '.[].cpumask')" == "0xf" ]] +[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_scsi0.0 | jq -r '.[].cpumask')" == "0xe" ]] -rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk0.0 Nvme0n1p4 --cpumask 0xf -[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk0.0 | jq -r '.[].cpumask')" == "0xf" ]] -rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk1.0 Nvme0n1p5 --cpumask 0x1 -[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk1.0 | jq -r '.[].cpumask')" == "0x1" ]] +rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk0.0 Nvme0n1p4 --cpumask 0xe +[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk0.0 | jq -r '.[].cpumask')" == "0xe" ]] +rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk1.0 Nvme0n1p5 --cpumask 0x4 +[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk1.0 | jq -r '.[].cpumask')" == "0x4" ]] rpc_cmd bdev_malloc_create 128 512 --name Malloc0 rpc_cmd vhost_create_scsi_controller naa.Malloc0.0 --cpumask 0x2 @@ -54,7 +54,7 @@ rpc_cmd vhost_scsi_controller_add_target naa.Malloc1.0 0 Malloc1 # start a dummy app, create vhost bdevs in it, then dump the config for FIO # Pre-allocate 1GB of memory for the application - virtio-user initiator requires it. See issue #2596. -$SPDK_BIN_DIR/spdk_tgt -r /tmp/spdk2.sock -g -s 1024 & +$SPDK_BIN_DIR/spdk_tgt -r /tmp/spdk2.sock -g -s 1024 -m 0x1 & dummy_spdk_pid=$! waitforlisten $dummy_spdk_pid /tmp/spdk2.sock rpc_cmd -s /tmp/spdk2.sock bdev_virtio_attach_controller --trtype user --traddr 'naa.Nvme0n1_scsi0.0' -d scsi --vq-count 8 'VirtioScsi0'