From 0af934b38c26272c2ab13e9c8bb514c71bf4d7ca Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Thu, 20 Oct 2022 13:21:23 +0000 Subject: [PATCH] event: add CPU lock files When running SPDK application on a given set of CPU cores, create lock files for each of them. This wil prevent user misconfiguration and assigning a core to more than one SPDK instance. The introduced mechanism is based on device locks implemented in spdk_pci_device_claim() function. Add a command line option to disable lock files. This feature will be useful in cases where differing CPU cores is impossible (eg. setup with only one core available). The patch also fixes all existing cases of overlapping core masks. Change-Id: Ie9aacb7523a3597b9aa20f2c3fa9efe4db92c44c Signed-off-by: Krzysztof Karas Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14919 Tested-by: SPDK CI Jenkins Community-CI: Mellanox Build Bot Reviewed-by: Jim Harris Reviewed-by: Tomasz Zawadzki Reviewed-by: Shuhei Matsumoto --- CHANGELOG.md | 7 ++ lib/event/app.c | 108 ++++++++++++++++++++++++++++++ scripts/setup.sh | 1 + test/iscsi_tgt/sock/sock.sh | 18 ++--- test/nvmf/host/bdevperf.sh | 2 +- test/nvmf/host/failover.sh | 2 +- test/nvmf/host/multicontroller.sh | 2 +- test/nvmf/target/nvmf_vhost.sh | 4 +- test/nvmf/target/zcopy.sh | 2 +- test/sma/crypto.sh | 4 +- test/sma/discovery.sh | 6 +- test/vhost/fuzz/fuzz.sh | 5 +- test/vhost/initiator/blockdev.sh | 14 ++-- 13 files changed, 147 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8618d8815..21dd549ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,13 @@ New `spdk_bdev_copy_blocks` and `spdk_bdev_get_max_copy` APIs to support copy co A new API `spdk_bdev_io_get_submit_tsc` was added to get the submit_tsc of the bdev I/O. +### event + +Added core lock file mechanism to prevent the same CPU cores from being used by multiple +SPDK application instances. + +Added a command line switch to disable CPU locks on SPDK startup. + ## v22.09 ### accel diff --git a/lib/event/app.c b/lib/event/app.c index a9f377093..2aebcd0f4 100644 --- a/lib/event/app.c +++ b/lib/event/app.c @@ -31,6 +31,8 @@ #define SPDK_APP_DPDK_DEFAULT_BASE_VIRTADDR 0x200000000000 #define SPDK_APP_DEFAULT_CORE_LIMIT 0x140000000 /* 5 GiB */ +#define MAX_CPU_CORES 128 + struct spdk_app { const char *json_config_file; bool json_config_ignore_errors; @@ -49,6 +51,9 @@ static bool g_delay_subsystem_init = false; static bool g_shutdown_sig_received = false; static char *g_executable_name; static struct spdk_app_opts g_default_opts; +static bool g_disable_cpumask_locks = false; + +static int g_core_locks[MAX_CPU_CORES]; int spdk_app_get_shm_id(void) @@ -116,6 +121,8 @@ static const struct option g_cmdline_options[] = { {"base-virtaddr", required_argument, NULL, BASE_VIRTADDR_OPT_IDX}, #define ENV_CONTEXT_OPT_IDX 266 {"env-context", required_argument, NULL, ENV_CONTEXT_OPT_IDX}, +#define DISABLE_CPUMASK_LOCKS_OPT_IDX 267 + {"disable-cpumask-locks", no_argument, NULL, DISABLE_CPUMASK_LOCKS_OPT_IDX}, }; static void @@ -508,6 +515,86 @@ app_copy_opts(struct spdk_app_opts *opts, struct spdk_app_opts *opts_user, size_ #undef SET_FIELD } +static void +unclaim_cpu_cores(void) +{ + char core_name[40]; + uint32_t i; + + for (i = 0; i < MAX_CPU_CORES; i++) { + if (g_core_locks[i] != -1) { + snprintf(core_name, sizeof(core_name), "/var/tmp/spdk_cpu_lock_%03d", i); + close(g_core_locks[i]); + unlink(core_name); + } + } +} + +static int +claim_cpu_cores(uint32_t *failed_core) +{ + char core_name[40]; + int core_fd, pid; + int *core_map; + uint32_t core; + + struct flock core_lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + + SPDK_ENV_FOREACH_CORE(core) { + snprintf(core_name, sizeof(core_name), "/var/tmp/spdk_cpu_lock_%03d", core); + core_fd = open(core_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (core_fd == -1) { + SPDK_ERRLOG("Could not open %s (%s).\n", core_name, spdk_strerror(errno)); + /* Return number of core we failed to claim. */ + goto error; + } + + if (ftruncate(core_fd, sizeof(int)) != 0) { + SPDK_ERRLOG("Could not truncate %s (%s).\n", core_name, spdk_strerror(errno)); + close(core_fd); + goto error; + } + + core_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, MAP_SHARED, core_fd, 0); + if (core_map == MAP_FAILED) { + SPDK_ERRLOG("Could not mmap core %s (%s).\n", core_name, spdk_strerror(errno)); + close(core_fd); + goto error; + } + + if (fcntl(core_fd, F_SETLK, &core_lock) != 0) { + pid = *core_map; + SPDK_ERRLOG("Cannot create lock on core %" PRIu32 ", probably process %d has claimed it.\n", + core, pid); + munmap(core_map, sizeof(int)); + close(core_fd); + goto error; + } + + /* We write the PID to the core lock file so that other processes trying + * to claim the same core will know what process is holding the lock. */ + *core_map = (int)getpid(); + munmap(core_map, sizeof(int)); + g_core_locks[core] = core_fd; + /* Keep core_fd open to maintain the lock. */ + } + + return 0; + +error: + if (failed_core != NULL) { + /* Set number of core we failed to claim. */ + *failed_core = core; + } + unclaim_cpu_cores(); + return -1; +} + int spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn, void *arg1) @@ -518,6 +605,7 @@ spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn, static bool g_env_was_setup = false; struct spdk_app_opts opts_local = {}; struct spdk_app_opts *opts = &opts_local; + uint32_t i; if (!opts_user) { SPDK_ERRLOG("opts_user should not be NULL\n"); @@ -583,6 +671,21 @@ spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn, } spdk_log_open(opts->log); + + /* Initialize each lock to -1 to indicate "empty" status */ + for (i = 0; i < MAX_CPU_CORES; i++) { + g_core_locks[i] = -1; + } + + if (!g_disable_cpumask_locks) { + if (claim_cpu_cores(NULL)) { + SPDK_ERRLOG("Unable to acquire lock on assigned core mask - exiting.\n"); + return 1; + } + } else { + SPDK_NOTICELOG("CPU core locks deactivated.\n"); + } + SPDK_NOTICELOG("Total cores available: %d\n", spdk_env_get_core_count()); if ((rc = spdk_reactors_init(opts->msg_mempool_size)) != 0) { @@ -639,6 +742,7 @@ spdk_app_fini(void) spdk_reactors_fini(); spdk_env_fini(); spdk_log_close(); + unclaim_cpu_cores(); } static void @@ -718,6 +822,7 @@ usage(void (*app_usage)(void)) { printf("%dMB)\n", g_default_opts.mem_size >= 0 ? g_default_opts.mem_size : 0); } + printf(" --disable-cpumask-locks Disable CPU core lock files.\n"); printf(" --silence-noticelog disable notice level logging to stderr\n"); printf(" -u, --no-pci disable PCI access\n"); printf(" --wait-for-rpc wait for RPCs to initialize subsystems\n"); @@ -843,6 +948,9 @@ spdk_app_parse_args(int argc, char **argv, struct spdk_app_opts *opts, case CPUMASK_OPT_IDX: opts->reactor_mask = optarg; break; + case DISABLE_CPUMASK_LOCKS_OPT_IDX: + g_disable_cpumask_locks = true; + break; case MEM_CHANNELS_OPT_IDX: opts->mem_channel = spdk_strtol(optarg, 0); if (opts->mem_channel < 0) { diff --git a/scripts/setup.sh b/scripts/setup.sh index 8b0fde7ec..a1b20a048 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -422,6 +422,7 @@ function cleanup_linux() { files_to_clean+=("$dir/"*) done file_locks+=(/var/tmp/spdk_pci_lock*) + file_locks+=(/var/tmp/spdk_cpu_lock*) files_to_clean+=(/dev/shm/@(@($match_spdk)_trace|spdk_iscsi_conns)*) files_to_clean+=("${file_locks[@]}") diff --git a/test/iscsi_tgt/sock/sock.sh b/test/iscsi_tgt/sock/sock.sh index 6d28a1170..8e08bcaec 100755 --- a/test/iscsi_tgt/sock/sock.sh +++ b/test/iscsi_tgt/sock/sock.sh @@ -119,39 +119,39 @@ timing_enter sock_ssl_server echo "Testing SSL server path" # start echo server using hello_sock echo server -$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S $PSK & +$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S $PSK -m 0x1 & server_pid=$! trap 'killprocess $server_pid; iscsitestfini; exit 1' SIGINT SIGTERM EXIT waitforlisten $server_pid # send message using hello_sock client message="**MESSAGE:This is a test message from the hello_sock client with ssl**" -response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK) +response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -m 0x2) if ! echo "$response" | grep -q "$message"; then exit 1 fi # send message using hello_sock client using TLS 1.3 message="**MESSAGE:This is a test message from the hello_sock client with ssl using TLS 1.3**" -response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 13) +response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 13 -m 0x2) if ! echo "$response" | grep -q "$message"; then exit 1 fi # send message using hello_sock client using TLS 1.2 message="**MESSAGE:This is a test message from the hello_sock client with ssl using TLS 1.2**" -response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 12) +response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 12 -m 0x2) if ! echo "$response" | grep -q "$message"; then exit 1 fi # send message using hello_sock client using incorrect TLS 7 message="**MESSAGE:This is a test message from the hello_sock client with ssl using incorrect TLS 7**" -echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 7 && exit 1 +echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 7 -m 0x2 && exit 1 # send message using hello_sock client with KTLS disabled message="**MESSAGE:This is a test message from the hello_sock client with KTLS disabled**" -response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -k) +response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -k -m 0x2) if ! echo "$response" | grep -q "$message"; then exit 1 fi @@ -187,11 +187,11 @@ fi # send message using hello_sock client with unmatching PSK KEY, expect a failure message="**MESSAGE:This is a test message from the hello_sock client with unmatching psk_key**" -echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -E 4321DEADBEEF1234 && exit 1 +echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -E 4321DEADBEEF1234 -m 0x2 && exit 1 # send message using hello_sock client with unmatching PSK IDENTITY, expect a failure message="**MESSAGE:This is a test message from the hello_sock client with unmatching psk_key**" -echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -I WRONG_PSK_ID && exit 1 +echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -I WRONG_PSK_ID -m 0x2 && exit 1 trap '-' SIGINT SIGTERM EXIT # NOTE: socat returns code 143 on SIGINT @@ -206,7 +206,7 @@ timing_exit sock_ssl_server timing_enter sock_server # start echo server using hello_sock echo server -$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S -N "posix" & +$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S -N "posix" -m 0x1 & server_pid=$! trap 'killprocess $server_pid; iscsitestfini; exit 1' SIGINT SIGTERM EXIT waitforlisten $server_pid diff --git a/test/nvmf/host/bdevperf.sh b/test/nvmf/host/bdevperf.sh index a166eac24..2e35effef 100755 --- a/test/nvmf/host/bdevperf.sh +++ b/test/nvmf/host/bdevperf.sh @@ -9,7 +9,7 @@ MALLOC_BDEV_SIZE=64 MALLOC_BLOCK_SIZE=512 function tgt_init() { - nvmfappstart -m 0xF + nvmfappstart -m 0xE $rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 $rpc_py bdev_malloc_create $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE -b Malloc0 diff --git a/test/nvmf/host/failover.sh b/test/nvmf/host/failover.sh index eebfd4c2d..d8ebe4f4c 100755 --- a/test/nvmf/host/failover.sh +++ b/test/nvmf/host/failover.sh @@ -14,7 +14,7 @@ bdevperf_rpc_sock=/var/tmp/bdevperf.sock nvmftestinit -nvmfappstart -m 0xF +nvmfappstart -m 0xE $rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 $rpc_py bdev_malloc_create $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE -b Malloc0 diff --git a/test/nvmf/host/multicontroller.sh b/test/nvmf/host/multicontroller.sh index 8c1ff2b5e..d3219b337 100755 --- a/test/nvmf/host/multicontroller.sh +++ b/test/nvmf/host/multicontroller.sh @@ -19,7 +19,7 @@ fi nvmftestinit -nvmfappstart -m 0xF +nvmfappstart -m 0xE $rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192 diff --git a/test/nvmf/target/nvmf_vhost.sh b/test/nvmf/target/nvmf_vhost.sh index 89b115927..18275eab3 100755 --- a/test/nvmf/target/nvmf_vhost.sh +++ b/test/nvmf/target/nvmf_vhost.sh @@ -20,7 +20,7 @@ vhosttestinit nvmftestinit # Start Apps -"${NVMF_APP[@]}" -r $NVMF_SOCK & +"${NVMF_APP[@]}" -r $NVMF_SOCK -m 0x1 & nvmfpid=$! waitforlisten $nvmfpid $NVMF_SOCK @@ -28,7 +28,7 @@ trap 'process_shm --id $NVMF_APP_SHM_ID; nvmftestfini; exit 1' SIGINT SIGTERM EX mkdir -p "$(get_vhost_dir 3)" -"${VHOST_APP[@]}" -S "$(get_vhost_dir 3)" & +"${VHOST_APP[@]}" -S "$(get_vhost_dir 3)" -m 0x2 & vhostpid=$! waitforlisten $vhostpid $NVMF_SOCK diff --git a/test/nvmf/target/zcopy.sh b/test/nvmf/target/zcopy.sh index 602b40da5..deec471ee 100755 --- a/test/nvmf/target/zcopy.sh +++ b/test/nvmf/target/zcopy.sh @@ -7,7 +7,7 @@ source $rootdir/test/common/autotest_common.sh source $rootdir/test/nvmf/common.sh nvmftestinit -nvmfappstart +nvmfappstart -m 0x2 if [ "$TEST_TRANSPORT" != tcp ]; then echo "Unsupported transport: $TEST_TRANSPORT" diff --git a/test/sma/crypto.sh b/test/sma/crypto.sh index b0d8ffc3b..0ed9a7f14 100755 --- a/test/sma/crypto.sh +++ b/test/sma/crypto.sh @@ -140,10 +140,10 @@ verify_crypto_volume() { trap "cleanup; exit 1" SIGINT SIGTERM EXIT -"$rootdir/build/bin/spdk_tgt" & +"$rootdir/build/bin/spdk_tgt" -m 0x1 & hostpid=$! -"$rootdir/build/bin/spdk_tgt" -r "$tgtsock" & +"$rootdir/build/bin/spdk_tgt" -r "$tgtsock" -m 0x2 & tgtpid=$! $rootdir/scripts/sma.py -c <( diff --git a/test/sma/discovery.sh b/test/sma/discovery.sh index 8547450ab..81e0d1b79 100755 --- a/test/sma/discovery.sh +++ b/test/sma/discovery.sh @@ -129,13 +129,13 @@ function detach_volume() { trap "cleanup; exit 1" SIGINT SIGTERM EXIT # Start two remote targets -$rootdir/build/bin/spdk_tgt -r $t1sock & +$rootdir/build/bin/spdk_tgt -r $t1sock -m 0x1 & t1pid=$! -$rootdir/build/bin/spdk_tgt -r $t2sock & +$rootdir/build/bin/spdk_tgt -r $t2sock -m 0x2 & t2pid=$! # One target that the SMA will configure -$rootdir/build/bin/spdk_tgt & +$rootdir/build/bin/spdk_tgt -m 0x4 & tgtpid=$! # And finally the SMA itself diff --git a/test/vhost/fuzz/fuzz.sh b/test/vhost/fuzz/fuzz.sh index 7b77c5493..3145475b1 100755 --- a/test/vhost/fuzz/fuzz.sh +++ b/test/vhost/fuzz/fuzz.sh @@ -5,7 +5,10 @@ rootdir=$(readlink -f $(dirname $0))/../../.. source $rootdir/test/common/autotest_common.sh source "$rootdir/scripts/common.sh" -VHOST_APP+=(-p 0) +# This test fails, if second VHOST_FUZZ_APP call is run on +# a different core than VHOST_APP. This problem has been +# explained in GitHub issue #2732. +VHOST_APP+=(-p 0 --disable-cpumask-locks) FUZZ_RPC_SOCK="/var/tmp/spdk_fuzz.sock" VHOST_FUZZ_APP+=(-r "$FUZZ_RPC_SOCK" -g -s 256 --wait-for-rpc) diff --git a/test/vhost/initiator/blockdev.sh b/test/vhost/initiator/blockdev.sh index 9f4181318..082911841 100755 --- a/test/vhost/initiator/blockdev.sh +++ b/test/vhost/initiator/blockdev.sh @@ -19,7 +19,7 @@ function err_cleanup() { # start vhost and configure it trap 'err_cleanup; exit 1' SIGINT SIGTERM EXIT -$SPDK_BIN_DIR/vhost -m 0xf & +$SPDK_BIN_DIR/vhost -m 0xe & vhost_pid=$! waitforlisten $vhost_pid @@ -35,12 +35,12 @@ rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 0 Nvme0n1p0 rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 1 Nvme0n1p1 rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 2 Nvme0n1p2 rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 3 Nvme0n1p3 -[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_scsi0.0 | jq -r '.[].cpumask')" == "0xf" ]] +[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_scsi0.0 | jq -r '.[].cpumask')" == "0xe" ]] -rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk0.0 Nvme0n1p4 --cpumask 0xf -[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk0.0 | jq -r '.[].cpumask')" == "0xf" ]] -rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk1.0 Nvme0n1p5 --cpumask 0x1 -[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk1.0 | jq -r '.[].cpumask')" == "0x1" ]] +rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk0.0 Nvme0n1p4 --cpumask 0xe +[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk0.0 | jq -r '.[].cpumask')" == "0xe" ]] +rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk1.0 Nvme0n1p5 --cpumask 0x4 +[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk1.0 | jq -r '.[].cpumask')" == "0x4" ]] rpc_cmd bdev_malloc_create 128 512 --name Malloc0 rpc_cmd vhost_create_scsi_controller naa.Malloc0.0 --cpumask 0x2 @@ -54,7 +54,7 @@ rpc_cmd vhost_scsi_controller_add_target naa.Malloc1.0 0 Malloc1 # start a dummy app, create vhost bdevs in it, then dump the config for FIO # Pre-allocate 1GB of memory for the application - virtio-user initiator requires it. See issue #2596. -$SPDK_BIN_DIR/spdk_tgt -r /tmp/spdk2.sock -g -s 1024 & +$SPDK_BIN_DIR/spdk_tgt -r /tmp/spdk2.sock -g -s 1024 -m 0x1 & dummy_spdk_pid=$! waitforlisten $dummy_spdk_pid /tmp/spdk2.sock rpc_cmd -s /tmp/spdk2.sock bdev_virtio_attach_controller --trtype user --traddr 'naa.Nvme0n1_scsi0.0' -d scsi --vq-count 8 'VirtioScsi0'