event: add CPU lock files

When running SPDK application on a given set of
CPU cores, create lock files for each of them.
This wil prevent user misconfiguration and
assigning a core to more than one SPDK instance.

The introduced mechanism is based on device locks
implemented in spdk_pci_device_claim() function.

Add a command line option to disable lock files.
This feature will be useful in cases where differing
CPU cores is impossible (eg. setup with only one core
available).

The patch also fixes all existing cases of overlapping
core masks.

Change-Id: Ie9aacb7523a3597b9aa20f2c3fa9efe4db92c44c
Signed-off-by: Krzysztof Karas <krzysztof.karas@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14919
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
This commit is contained in:
Krzysztof Karas 2022-10-20 13:21:23 +00:00 committed by Tomasz Zawadzki
parent 311d014d17
commit 0af934b38c
13 changed files with 147 additions and 28 deletions

View File

@ -27,6 +27,13 @@ New `spdk_bdev_copy_blocks` and `spdk_bdev_get_max_copy` APIs to support copy co
A new API `spdk_bdev_io_get_submit_tsc` was added to get the submit_tsc of the bdev I/O.
### event
Added core lock file mechanism to prevent the same CPU cores from being used by multiple
SPDK application instances.
Added a command line switch to disable CPU locks on SPDK startup.
## v22.09
### accel

View File

@ -31,6 +31,8 @@
#define SPDK_APP_DPDK_DEFAULT_BASE_VIRTADDR 0x200000000000
#define SPDK_APP_DEFAULT_CORE_LIMIT 0x140000000 /* 5 GiB */
#define MAX_CPU_CORES 128
struct spdk_app {
const char *json_config_file;
bool json_config_ignore_errors;
@ -49,6 +51,9 @@ static bool g_delay_subsystem_init = false;
static bool g_shutdown_sig_received = false;
static char *g_executable_name;
static struct spdk_app_opts g_default_opts;
static bool g_disable_cpumask_locks = false;
static int g_core_locks[MAX_CPU_CORES];
int
spdk_app_get_shm_id(void)
@ -116,6 +121,8 @@ static const struct option g_cmdline_options[] = {
{"base-virtaddr", required_argument, NULL, BASE_VIRTADDR_OPT_IDX},
#define ENV_CONTEXT_OPT_IDX 266
{"env-context", required_argument, NULL, ENV_CONTEXT_OPT_IDX},
#define DISABLE_CPUMASK_LOCKS_OPT_IDX 267
{"disable-cpumask-locks", no_argument, NULL, DISABLE_CPUMASK_LOCKS_OPT_IDX},
};
static void
@ -508,6 +515,86 @@ app_copy_opts(struct spdk_app_opts *opts, struct spdk_app_opts *opts_user, size_
#undef SET_FIELD
}
static void
unclaim_cpu_cores(void)
{
char core_name[40];
uint32_t i;
for (i = 0; i < MAX_CPU_CORES; i++) {
if (g_core_locks[i] != -1) {
snprintf(core_name, sizeof(core_name), "/var/tmp/spdk_cpu_lock_%03d", i);
close(g_core_locks[i]);
unlink(core_name);
}
}
}
static int
claim_cpu_cores(uint32_t *failed_core)
{
char core_name[40];
int core_fd, pid;
int *core_map;
uint32_t core;
struct flock core_lock = {
.l_type = F_WRLCK,
.l_whence = SEEK_SET,
.l_start = 0,
.l_len = 0,
};
SPDK_ENV_FOREACH_CORE(core) {
snprintf(core_name, sizeof(core_name), "/var/tmp/spdk_cpu_lock_%03d", core);
core_fd = open(core_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (core_fd == -1) {
SPDK_ERRLOG("Could not open %s (%s).\n", core_name, spdk_strerror(errno));
/* Return number of core we failed to claim. */
goto error;
}
if (ftruncate(core_fd, sizeof(int)) != 0) {
SPDK_ERRLOG("Could not truncate %s (%s).\n", core_name, spdk_strerror(errno));
close(core_fd);
goto error;
}
core_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, MAP_SHARED, core_fd, 0);
if (core_map == MAP_FAILED) {
SPDK_ERRLOG("Could not mmap core %s (%s).\n", core_name, spdk_strerror(errno));
close(core_fd);
goto error;
}
if (fcntl(core_fd, F_SETLK, &core_lock) != 0) {
pid = *core_map;
SPDK_ERRLOG("Cannot create lock on core %" PRIu32 ", probably process %d has claimed it.\n",
core, pid);
munmap(core_map, sizeof(int));
close(core_fd);
goto error;
}
/* We write the PID to the core lock file so that other processes trying
* to claim the same core will know what process is holding the lock. */
*core_map = (int)getpid();
munmap(core_map, sizeof(int));
g_core_locks[core] = core_fd;
/* Keep core_fd open to maintain the lock. */
}
return 0;
error:
if (failed_core != NULL) {
/* Set number of core we failed to claim. */
*failed_core = core;
}
unclaim_cpu_cores();
return -1;
}
int
spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn,
void *arg1)
@ -518,6 +605,7 @@ spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn,
static bool g_env_was_setup = false;
struct spdk_app_opts opts_local = {};
struct spdk_app_opts *opts = &opts_local;
uint32_t i;
if (!opts_user) {
SPDK_ERRLOG("opts_user should not be NULL\n");
@ -583,6 +671,21 @@ spdk_app_start(struct spdk_app_opts *opts_user, spdk_msg_fn start_fn,
}
spdk_log_open(opts->log);
/* Initialize each lock to -1 to indicate "empty" status */
for (i = 0; i < MAX_CPU_CORES; i++) {
g_core_locks[i] = -1;
}
if (!g_disable_cpumask_locks) {
if (claim_cpu_cores(NULL)) {
SPDK_ERRLOG("Unable to acquire lock on assigned core mask - exiting.\n");
return 1;
}
} else {
SPDK_NOTICELOG("CPU core locks deactivated.\n");
}
SPDK_NOTICELOG("Total cores available: %d\n", spdk_env_get_core_count());
if ((rc = spdk_reactors_init(opts->msg_mempool_size)) != 0) {
@ -639,6 +742,7 @@ spdk_app_fini(void)
spdk_reactors_fini();
spdk_env_fini();
spdk_log_close();
unclaim_cpu_cores();
}
static void
@ -718,6 +822,7 @@ usage(void (*app_usage)(void))
{
printf("%dMB)\n", g_default_opts.mem_size >= 0 ? g_default_opts.mem_size : 0);
}
printf(" --disable-cpumask-locks Disable CPU core lock files.\n");
printf(" --silence-noticelog disable notice level logging to stderr\n");
printf(" -u, --no-pci disable PCI access\n");
printf(" --wait-for-rpc wait for RPCs to initialize subsystems\n");
@ -843,6 +948,9 @@ spdk_app_parse_args(int argc, char **argv, struct spdk_app_opts *opts,
case CPUMASK_OPT_IDX:
opts->reactor_mask = optarg;
break;
case DISABLE_CPUMASK_LOCKS_OPT_IDX:
g_disable_cpumask_locks = true;
break;
case MEM_CHANNELS_OPT_IDX:
opts->mem_channel = spdk_strtol(optarg, 0);
if (opts->mem_channel < 0) {

View File

@ -422,6 +422,7 @@ function cleanup_linux() {
files_to_clean+=("$dir/"*)
done
file_locks+=(/var/tmp/spdk_pci_lock*)
file_locks+=(/var/tmp/spdk_cpu_lock*)
files_to_clean+=(/dev/shm/@(@($match_spdk)_trace|spdk_iscsi_conns)*)
files_to_clean+=("${file_locks[@]}")

View File

@ -119,39 +119,39 @@ timing_enter sock_ssl_server
echo "Testing SSL server path"
# start echo server using hello_sock echo server
$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S $PSK &
$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S $PSK -m 0x1 &
server_pid=$!
trap 'killprocess $server_pid; iscsitestfini; exit 1' SIGINT SIGTERM EXIT
waitforlisten $server_pid
# send message using hello_sock client
message="**MESSAGE:This is a test message from the hello_sock client with ssl**"
response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK)
response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -m 0x2)
if ! echo "$response" | grep -q "$message"; then
exit 1
fi
# send message using hello_sock client using TLS 1.3
message="**MESSAGE:This is a test message from the hello_sock client with ssl using TLS 1.3**"
response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 13)
response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 13 -m 0x2)
if ! echo "$response" | grep -q "$message"; then
exit 1
fi
# send message using hello_sock client using TLS 1.2
message="**MESSAGE:This is a test message from the hello_sock client with ssl using TLS 1.2**"
response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 12)
response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 12 -m 0x2)
if ! echo "$response" | grep -q "$message"; then
exit 1
fi
# send message using hello_sock client using incorrect TLS 7
message="**MESSAGE:This is a test message from the hello_sock client with ssl using incorrect TLS 7**"
echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 7 && exit 1
echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -T 7 -m 0x2 && exit 1
# send message using hello_sock client with KTLS disabled
message="**MESSAGE:This is a test message from the hello_sock client with KTLS disabled**"
response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -k)
response=$(echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -k -m 0x2)
if ! echo "$response" | grep -q "$message"; then
exit 1
fi
@ -187,11 +187,11 @@ fi
# send message using hello_sock client with unmatching PSK KEY, expect a failure
message="**MESSAGE:This is a test message from the hello_sock client with unmatching psk_key**"
echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -E 4321DEADBEEF1234 && exit 1
echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -E 4321DEADBEEF1234 -m 0x2 && exit 1
# send message using hello_sock client with unmatching PSK IDENTITY, expect a failure
message="**MESSAGE:This is a test message from the hello_sock client with unmatching psk_key**"
echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -I WRONG_PSK_ID && exit 1
echo $message | $HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT $PSK -I WRONG_PSK_ID -m 0x2 && exit 1
trap '-' SIGINT SIGTERM EXIT
# NOTE: socat returns code 143 on SIGINT
@ -206,7 +206,7 @@ timing_exit sock_ssl_server
timing_enter sock_server
# start echo server using hello_sock echo server
$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S -N "posix" &
$HELLO_SOCK_APP -H $TARGET_IP -P $ISCSI_PORT -S -N "posix" -m 0x1 &
server_pid=$!
trap 'killprocess $server_pid; iscsitestfini; exit 1' SIGINT SIGTERM EXIT
waitforlisten $server_pid

View File

@ -9,7 +9,7 @@ MALLOC_BDEV_SIZE=64
MALLOC_BLOCK_SIZE=512
function tgt_init() {
nvmfappstart -m 0xF
nvmfappstart -m 0xE
$rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192
$rpc_py bdev_malloc_create $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE -b Malloc0

View File

@ -14,7 +14,7 @@ bdevperf_rpc_sock=/var/tmp/bdevperf.sock
nvmftestinit
nvmfappstart -m 0xF
nvmfappstart -m 0xE
$rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192
$rpc_py bdev_malloc_create $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE -b Malloc0

View File

@ -19,7 +19,7 @@ fi
nvmftestinit
nvmfappstart -m 0xF
nvmfappstart -m 0xE
$rpc_py nvmf_create_transport $NVMF_TRANSPORT_OPTS -u 8192

View File

@ -20,7 +20,7 @@ vhosttestinit
nvmftestinit
# Start Apps
"${NVMF_APP[@]}" -r $NVMF_SOCK &
"${NVMF_APP[@]}" -r $NVMF_SOCK -m 0x1 &
nvmfpid=$!
waitforlisten $nvmfpid $NVMF_SOCK
@ -28,7 +28,7 @@ trap 'process_shm --id $NVMF_APP_SHM_ID; nvmftestfini; exit 1' SIGINT SIGTERM EX
mkdir -p "$(get_vhost_dir 3)"
"${VHOST_APP[@]}" -S "$(get_vhost_dir 3)" &
"${VHOST_APP[@]}" -S "$(get_vhost_dir 3)" -m 0x2 &
vhostpid=$!
waitforlisten $vhostpid $NVMF_SOCK

View File

@ -7,7 +7,7 @@ source $rootdir/test/common/autotest_common.sh
source $rootdir/test/nvmf/common.sh
nvmftestinit
nvmfappstart
nvmfappstart -m 0x2
if [ "$TEST_TRANSPORT" != tcp ]; then
echo "Unsupported transport: $TEST_TRANSPORT"

View File

@ -140,10 +140,10 @@ verify_crypto_volume() {
trap "cleanup; exit 1" SIGINT SIGTERM EXIT
"$rootdir/build/bin/spdk_tgt" &
"$rootdir/build/bin/spdk_tgt" -m 0x1 &
hostpid=$!
"$rootdir/build/bin/spdk_tgt" -r "$tgtsock" &
"$rootdir/build/bin/spdk_tgt" -r "$tgtsock" -m 0x2 &
tgtpid=$!
$rootdir/scripts/sma.py -c <(

View File

@ -129,13 +129,13 @@ function detach_volume() {
trap "cleanup; exit 1" SIGINT SIGTERM EXIT
# Start two remote targets
$rootdir/build/bin/spdk_tgt -r $t1sock &
$rootdir/build/bin/spdk_tgt -r $t1sock -m 0x1 &
t1pid=$!
$rootdir/build/bin/spdk_tgt -r $t2sock &
$rootdir/build/bin/spdk_tgt -r $t2sock -m 0x2 &
t2pid=$!
# One target that the SMA will configure
$rootdir/build/bin/spdk_tgt &
$rootdir/build/bin/spdk_tgt -m 0x4 &
tgtpid=$!
# And finally the SMA itself

View File

@ -5,7 +5,10 @@ rootdir=$(readlink -f $(dirname $0))/../../..
source $rootdir/test/common/autotest_common.sh
source "$rootdir/scripts/common.sh"
VHOST_APP+=(-p 0)
# This test fails, if second VHOST_FUZZ_APP call is run on
# a different core than VHOST_APP. This problem has been
# explained in GitHub issue #2732.
VHOST_APP+=(-p 0 --disable-cpumask-locks)
FUZZ_RPC_SOCK="/var/tmp/spdk_fuzz.sock"
VHOST_FUZZ_APP+=(-r "$FUZZ_RPC_SOCK" -g -s 256 --wait-for-rpc)

View File

@ -19,7 +19,7 @@ function err_cleanup() {
# start vhost and configure it
trap 'err_cleanup; exit 1' SIGINT SIGTERM EXIT
$SPDK_BIN_DIR/vhost -m 0xf &
$SPDK_BIN_DIR/vhost -m 0xe &
vhost_pid=$!
waitforlisten $vhost_pid
@ -35,12 +35,12 @@ rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 0 Nvme0n1p0
rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 1 Nvme0n1p1
rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 2 Nvme0n1p2
rpc_cmd vhost_scsi_controller_add_target naa.Nvme0n1_scsi0.0 3 Nvme0n1p3
[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_scsi0.0 | jq -r '.[].cpumask')" == "0xf" ]]
[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_scsi0.0 | jq -r '.[].cpumask')" == "0xe" ]]
rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk0.0 Nvme0n1p4 --cpumask 0xf
[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk0.0 | jq -r '.[].cpumask')" == "0xf" ]]
rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk1.0 Nvme0n1p5 --cpumask 0x1
[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk1.0 | jq -r '.[].cpumask')" == "0x1" ]]
rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk0.0 Nvme0n1p4 --cpumask 0xe
[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk0.0 | jq -r '.[].cpumask')" == "0xe" ]]
rpc_cmd vhost_create_blk_controller naa.Nvme0n1_blk1.0 Nvme0n1p5 --cpumask 0x4
[[ "$(rpc_cmd vhost_get_controllers -n naa.Nvme0n1_blk1.0 | jq -r '.[].cpumask')" == "0x4" ]]
rpc_cmd bdev_malloc_create 128 512 --name Malloc0
rpc_cmd vhost_create_scsi_controller naa.Malloc0.0 --cpumask 0x2
@ -54,7 +54,7 @@ rpc_cmd vhost_scsi_controller_add_target naa.Malloc1.0 0 Malloc1
# start a dummy app, create vhost bdevs in it, then dump the config for FIO
# Pre-allocate 1GB of memory for the application - virtio-user initiator requires it. See issue #2596.
$SPDK_BIN_DIR/spdk_tgt -r /tmp/spdk2.sock -g -s 1024 &
$SPDK_BIN_DIR/spdk_tgt -r /tmp/spdk2.sock -g -s 1024 -m 0x1 &
dummy_spdk_pid=$!
waitforlisten $dummy_spdk_pid /tmp/spdk2.sock
rpc_cmd -s /tmp/spdk2.sock bdev_virtio_attach_controller --trtype user --traddr 'naa.Nvme0n1_scsi0.0' -d scsi --vq-count 8 'VirtioScsi0'