From b06212cfb56cb73532c078843ab8445715df88fb Mon Sep 17 00:00:00 2001 From: Karol Latecki Date: Tue, 23 Feb 2021 19:29:58 +0100 Subject: [PATCH] scripts/nvmf_perf: re-write nvmf perf readme file Readme was frequently missed when adding new stuff or updating the performance script, and it is out of date. Signed-off-by: Karol Latecki Change-Id: I1710473be576ffbcc2fa8e3701b196bd46cf6654 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6538 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins Reviewed-by: Tomasz Zawadzki Reviewed-by: Jim Harris --- scripts/perf/nvmf/README.md | 447 +++++++++++++++++++++++----------- scripts/perf/nvmf/config.json | 16 +- 2 files changed, 314 insertions(+), 149 deletions(-) diff --git a/scripts/perf/nvmf/README.md b/scripts/perf/nvmf/README.md index 63106494d..70f12ad4d 100644 --- a/scripts/perf/nvmf/README.md +++ b/scripts/perf/nvmf/README.md @@ -1,204 +1,367 @@ -## Running NVMe-OF Performace Testcases +# Running NVMe-OF Performance Test Cases -In order to reproduce test cases described in [SPDK NVMe-OF Performance Test Cases](https://ci.spdk.io/download/performance-reports/SPDK_nvmeof_perf_report_18.04.pdf) follow the following instructions. +Scripts contained in this directory are used to run TCP and RDMA benchmark tests, +that are later published at [spdk.io performance reports section](https://spdk.io/doc/performance_reports.html). +To run the scripts in your environment please follow steps below. -Currently RDMA NIC IP address assignment must be done manually before running the tests. +## Test Systems Requirements -# Prepare the configuration file +- The OS installed on test systems must be a Linux OS. + Scripts were primarily used on systems with Fedora and + Ubuntu 18.04/20.04 distributions. +- Each test system must have at least one RDMA-capable NIC installed for RDMA tests. + For TCP tests any TCP-capable NIC will do. However, high-bandwidth, + high-performance NICs like Intel E810 CQDA2 or Mellanox ConnectX-5 are + suggested because the NVMe-oF workload is network bound. + So, if you use a NIC capable of less than 100Gbps on NVMe-oF target + system, you will quickly saturate your NICs. +- Python3 interpreter must be available on all test systems. + Paramiko and Pandas modules must be installed. +- nvmecli package must be installed on all test systems. +- fio must be downloaded from [Github](https://github.com/axboe/fio) and built. + This must be done on Initiator test systems to later build SPDK with + "--with-fio" option. +- All test systems must have a user account with a common name, + password and passwordless sudo enabled. +- [mlnx-tools](https://github.com/Mellanox/mlnx-tools) package must be downloaded + to /usr/src/local directory in order to configure NIC ports IRQ affinity. + If custom directory is to be used, then it must be set using irq_scripts_dir + option in Target and Initiator configuration sections. -Configure the target, initiators, and FIO workload in the json configuration file. +### Optional -## General +- For test using the Kernel Target, nvmet-cli must be downloaded and build on Target system. + nvmet-cli is available [here](http://git.infradead.org/users/hch/nvmetcli.git). -Options which apply to both target and all initiator servers such as "password" and "username" fields. -All servers are required to have the same user credentials for running the test. -Test results can be found in /tmp/results directory. +## Manual configuration -### transport +Before running the scripts some manual test systems configuration is required: -Transport layer to use between Target and Initiator servers - rdma or tcp. +- Configure IP address assignment on the NIC ports that will be used for test. + Make sure to make these assignments persistent, as in some cases NIC drivers may be reloaded. +- Adjust firewall service to allow traffic on IP - port pairs used in test + (or disable firewall service completely if possible). +- Adjust or completely disable local security engines like AppArmor or SELinux. -## Target +## JSON configuration for test run automation -Configure the target server information. +An example json configuration file with the minimum configuration required +to automate NVMe-oF testing is provided in this repository. +The following sub-chapters describe each configuration section in more detail. -### nic_ips +### General settings section -List of IP addresses othat will be used in this test.. -NVMe namespaces will be split between provided IP addresses. -So for example providing 2 IP's with 16 NVMe drives present will result in each IP managing -8 NVMe subystems. +``` ~sh +"general": { + "username": "user", + "password": "password", + "transport": "transport_type", + "skip_spdk_install": bool +} +``` -### mode +Required: -"spdk" or "kernel" values allowed. +- username - username for the SSH session +- password - password for the SSH session +- transport - transport layer to be used throughout the test ("tcp" or "rdma") -### null_block_devices +Optional: -Integer. Use null block devices instead of present NVMe drives. -If set to 1, can be used for latency measurements as described in Test Case 3 of performance report. +- skip_spdk_install - by default SPDK sources will be copied from Target + to the Initiator systems each time run_nvmf.py script is run. If the SPDK + is already in place on Initiator systems and there's no need to re-build it, + then set this option to true. + Default: false. -### null_block_dif_type +### Target System Configuration -Integer. Enable data protection on created null block device. Defaults to 0 if option -not present in JSON configuration file. See doc/jsonrpc.md "bdev_null_create" for details. +``` ~sh +"target": { + "mode": "spdk", + "nic_ips": ["192.0.1.1", "192.0.2.1"], + "core_mask": "[1-10]", + "null_block_devices": 8, + "nvmet_bin": "/path/to/nvmetcli", + "sar_settings": [true, 30, 1, 60], + "pcm_settings": [/tmp/pcm, 30, 1, 60], + "enable_bandwidth": [true, 60], + "enable_dpdk_memory": [true, 30] + "num_shared_buffers": 4096, + "scheduler_settings": "static", + "zcopy_settings": false, + "dif_insert_strip": true, + "null_block_dif_type": 3 +} +``` -### core_mask +Required: -List of CPU cores to assign for running SPDK NVMe-OF Target process. Can specify exact core numbers or ranges, eg: -[0, 1, 10-15]. +- mode - Target application mode, "spdk" or "kernel". +- nic_ips - IP addresses of NIC ports to be used by the target to export + NVMe-oF subsystems. +- core_mask - Used by SPDK target only. + CPU core mask either in form of actual mask (i.e. 0xAAAA) or core list + (i.e. [0,1,2-5,6). + At this moment the scripts cannot restrict the Kernel target to only + use certain CPU cores. Important: upper bound of the range is inclusive! -### nvmet_bin +Optional, common: -Path to nvmetcli application executable. If not provided then system-wide package will be used -by default. Not used if "mode" is set to "spdk". +- null_block_devices - int, number of null block devices to create. + Detected NVMe devices are not used if option is present. Default: 0. +- sar_settings - [bool, int(x), int(y), int(z)]; + Enable SAR CPU utilization measurement on Target side. + Wait for "x" seconds before starting measurements, then do "z" samples + with "y" seconds intervals between them. Default: disabled. +- pcm_settings - [path, int(x), int(y), int(z)]; + Enable [PCM](https://github.com/opcm/pcm.git) measurements on Tartet side. + Measurements include CPU, memory and power consumption. "path" points to a + directory where pcm executables are present. Default: disabled. +- enable_bandwidth - [bool, int]. Wait a given number of seconds and run + bwm-ng until the end of test to measure bandwidth utilization on network + interfaces. Default: disabled. +- tuned_profile - tunedadm profile to apply on the system before starting + the test. +- adq_enable - bool; only for TCP transport. + Configure system modules, NIC settings and create priority traffic classes + for ADQ testing. You need and ADQ-capable NIC like the Intel E810. +- irq_scripts_dir - path to scripts directory of Mellanox mlnx-tools package; + Used to run set_irq_affinity.sh script. + Default: /usr/src/local/mlnx-tools/ofed_scripts -### num_shared_buffers +Optional, Kernel Target only: -Number of shared buffers to use when creating transport layer. +- nvmet_bin - path to nvmetcli binary, if not available in $PATH. + Only for Kernel Target. Default: "nvmetcli". -### dif_insert_strip +Optional, SPDK Target only: -Boolean. If set to true - enable "dif_insert_or_strip" option for TCP transport layer. +- zcopy_settings - bool. Disable or enable target-size zero-copy option. + Default: false. +- scheduler_settings - str. Select SPDK Target thread scheduler (static/dynamic). + Default: static. +- num_shared_buffers - int, number of shared buffers to allocate when + creating transport layer. Default: 4096. +- dif_insert_strip - bool. Only for TCP transport. Enable DIF option when + creating transport layer. Default: false. +- null_block_dif_type - int, 0-3. Level of DIF type to use when creating + null block bdev. Default: 0. +- enable_dpdk_memory - [bool, int]. Wait for a given number of seconds and + call env_dpdk_get_mem_stats RPC call to dump DPDK memory stats. Typically + wait time should be at least ramp_time of fio described in another section. -### adq_enable +### Initiator system settings section -Configure and use ADQ on selected system. Only available when using Intel E810 NICs. -Set to "true" to enable. +There can be one or more `initiatorX` setting sections, depending on the test setup. -## Initiator +``` ~sh +"initiator1": { + "ip": "10.0.0.1", + "nic_ips": ["192.0.1.2"], + "remote_nic_ips": ["192.0.1.1"], + "mode": "spdk", + "fio_bin": "/path/to/fio/bin", + "nvmecli_bin": "/path/to/nvmecli/bin", + "cpus_allowed": "0,1,10-15", + "cpus_allowed_policy": "shared", + "num_cores": 4, + "cpu_frequency": 2100000, + "adq_enable": false +} +``` -Describes initiator arguments. There can be more than one initiator section in the configuration file. -For the sake of easier results parsing from multiple initiators please use only digits and letters -in initiator section name. +Required: -### ip +- ip - management IP address of initiator system to set up SSH connection. +- nic_ips - list of IP addresses of NIC ports to be used in test, + local to given initiator system. +- remote_nic_ips - list of IP addresses of Target NIC ports to which initiator + will attempt to connect to. +- mode - initiator mode, "spdk" or "kernel". For SPDK, the bdev fio plugin + will be used to connect to NVMe-oF subsystems and submit I/O. For "kernel", + nvmecli will be used to connect to NVMe-oF subsystems and fio will use the + libaio ioengine to submit I/Os. -Management IP address used for SSH communication with initiator server. +Optional, common: -### nic_ips +- nvmecli_bin - path to nvmecli binary; Will be used for "discovery" command + (for both SPDK and Kernel modes) and for "connect" (in case of Kernel mode). + Default: system-wide "nvme". +- fio_bin - path to custom fio binary, which will be used to run IO. + Additionally, the directory where the binary is located should also contain + fio sources needed to build SPDK fio_plugin for spdk initiator mode. + Default: /usr/src/fio/fio. +- cpus_allowed - str, list of CPU cores to run fio threads on. Takes precedence + before `num_cores` setting. Default: None (CPU cores randomly allocated). + For more information see `man fio`. +- cpus_allowed_policy - str, "shared" or "split". CPU sharing policy for fio + threads. Default: shared. For more information see `man fio`. +- num_cores - By default fio threads on initiator side will use as many CPUs + as there are connected subsystems. This option limits the number of CPU cores + used for fio threads to this number; cores are allocated randomly and fio + `filename` parameters are grouped if needed. `cpus_allowed` option takes + precedence and `num_cores` is ignored if both are present in config. +- cpu_frequency - int, custom CPU frequency to set. By default test setups are + configured to run in performance mode at max frequencies. This option allows + user to select CPU frequency instead of running at max frequency. Before + using this option `intel_pstate=disable` must be set in boot options and + cpupower governor be set to `userspace`. +- tuned_profile - tunedadm profile to apply on the system before starting + the test. +- adq_enable - bool; only for TCP transport. Configure system modules, NIC + settings and create priority traffic classes for ADQ testing. + You need an ADQ-capable NIC like Intel E810. +- irq_scripts_dir - path to scripts directory of Mellanox mlnx-tools package; + Used to run set_irq_affinity.sh script. + Default: /usr/src/local/mlnx-tools/ofed_scripts -List of IP addresses local to initiator. +### Fio settings section -### remote_nic_ips +``` ~sh +"fio": { + "bs": ["4k", "128k"], + "qd": [32, 128], + "rw": ["randwrite", "write"], + "rwmixread": 100, + "num_jobs": 2, + "run_time": 30, + "ramp_time": 30, + "run_num": 3 +} +``` -List of target IP addresses to which the initiator should try to connect. +Required: -### mode +- bs - fio IO block size +- qd - fio iodepth +- rw - fio rw mode +- rwmixread - read operations percentage in case of mixed workloads +- num_jobs - fio numjobs parameter + Note: may affect total number of CPU cores used by initiator systems +- run_time - fio run time +- ramp_time - fio ramp time, does not do measurements +- run_num - number of times each workload combination is run. + If more than 1 then final result is the average of all runs. -"spdk" or "kernel" values allowed. +#### Test Combinations -### cpus_allowed +It is possible to specify more than one value for bs, qd and rw parameters. +In such case script creates a list of their combinations and runs IO tests +for all of these combinations. For example, the following configuration: -List of CPU cores to assign for running SPDK NVMe-OF initiator process. -Can specify exact core numbers: 0,5 -or ranges: 10-15 -or binding to CPUs 0, 5, and 8 to 15: `cpus_allowed=0,5,8-15`. -If not specified then will use num_cores option. -If specified with num_cores then cpu_allowed parameter has higher priority than num_cores. +``` ~sh + "bs": ["4k"], + "qd": [32, 128], + "rw": ["write", "read"] +``` -### num_cores +results in following workloads being tested: -Applies only to SPDK initiator. Number of CPUs core to use for running FIO job. -If not specified then by default each connected subsystem gets its own CPU core. +- 4k-write-32 +- 4k-write-128 +- 4k-read-32 +- 4k-read-128 -### nvmecli_dir +#### Important note about queue depth parameter -Path to directory with nvme-cli application. If not provided then system-wide package will be used -by default. Not used if "mode" is set to "spdk". +qd in fio settings section refers to iodepth generated per single fio target +device ("filename" in resulting fio configuration file). It is re-calculated +while the script is running, so generated fio configuration file might contain +a different value than what user has specified at input, especially when also +using "numjobs" or initiator "num_cores" parameters. For example: -### fio_bin +Target system exposes 4 NVMe-oF subsystems. One initiator system connects to +all of these systems. -Path to the fio binary that will be used to compile SPDK and run the test. -If not specified, then the script will use /usr/src/fio/fio as the default. +Initiator configuration (relevant settings only): -### adq_enable +``` ~sh +"initiator1": { + "num_cores": 1 +} +``` -Configure and use ADQ on selected system. Only available when using Intel E810 NICs. -Set to "true" to enable. +Fio configuration: -### extra_params +``` ~sh +"fio": { + "bs": ["4k"], + "qd": [128], + "rw": ["randread"], + "rwmixread": 100, + "num_jobs": 1, + "run_time": 30, + "ramp_time": 30, + "run_num": 1 +} +``` -Space separated string with additional settings for "nvme connect" command -other than -t, -s, -n and -a. +In this case generated fio configuration will look like this +(relevant settings only): -## fio +``` ~sh +[global] +numjobs=1 -Fio job parameters. +[job_section0] +filename=Nvme0n1 +filename=Nvme1n1 +filename=Nvme2n1 +filename=Nvme3n1 +iodepth=512 +``` -- bs: block size -- qd: io depth - Per connected fio filename target -- rw: workload mode -- rwmixread: percentage of reads in readwrite workloads -- run_time: time (in seconds) to run workload -- ramp_time: time (in seconds) to run workload before statistics are gathered -- run_num: how many times to run given workload in loop +`num_cores` option results in 4 connected subsystems to be grouped under a +single fio thread (job_section0). Because `iodepth` is local to `job_section0`, +it is distributed between each `filename` local to job section in round-robin +(by default) fashion. In case of fio targets with the same characteristics +(IOPS & Bandwidth capabilities) it means that iodepth is distributed **roughly** +equally. Ultimately above fio configuration results in iodepth=128 per filename. -# Running Test +`numjobs` higher than 1 is also taken into account, so that desired qd per +filename is retained: + +``` ~sh +[global] +numjobs=2 + +[job_section0] +filename=Nvme0n1 +filename=Nvme1n1 +filename=Nvme2n1 +filename=Nvme3n1 +iodepth=256 +``` + +Besides `run_num`, more information on these options can be found in `man fio`. + +## Running the test + +Before running the test script run the spdk/scripts/setup.sh script on Target +system. This binds the devices to VFIO/UIO userspace driver and allocates +hugepages for SPDK process. -Before running the test script use the setup.sh script to bind the devices you want to -use in the test to the VFIO/UIO driver. Run the script on the NVMe-oF target system: - cd spdk - sudo PYTHONPATH=$PYTHONPATH:$PWD/scripts scripts/perf/nvmf/run_nvmf.py -The script uses the config.json configuration file in the scripts/perf/nvmf directory by default. You can -specify a different configuration file at runtime as shown below: -sudo PYTHONPATH=$PYTHONPATH:$PWD/scripts scripts/perf/nvmf/run_nvmf.py /path/to/config file/json config file - -The script uses another spdk script (scripts/rpc.py) so we pass the path to rpc.py by setting the Python path -as a runtime environment parameter. - -# Test Results - -When the test completes, you will find a csv file (nvmf_results.csv) containing the results in the target node -directory /tmp/results. - -# Processor Counter Monitor (PCM) -PCM Tools provides a number of command-line utilities for real-time monitoring. -Before using PCM Tools in nvmf perf scripts it needs to be installed on Target machine. -PCM source and instructions are available on https://github.com/opcm/pcm. -To enable PCM in perf test you need to add Target setting in config.json file: -``` -"pcm_settings": ["pcm_directory", delay_time, measure_interval, sample_count] -``` -example: -``` -"pcm_settings": ["/tmp/pcm", 10, 1, 30] -``` -Example above will run PCM measure for cpu, memory and power. Start will be delayed by 10s, -sample taken every 1 second. Last parameter is number of samples for cpu and power measure. -PCM memory do not support sample count. - -# Bandwidth monitor (bwm-ng) -PCM Tools provides a number of command-line utilities for real-time monitoring. -Before using bwm-ng in nvmf perf scripts it needs to be installed on Target machine. -To enable bandwidth monitor in perf test you need to add Target setting in config.json file: -``` -"bandwidth_settings": [bool, sample_count] -``` -example: -``` -"bandwidth_settings": [true, 30] +``` ~sh +cd spdk +sudo PYTHONPATH=$PYTHONPATH:$PWD/scripts scripts/perf/nvmf/run_nvmf.py ``` -# Enable zcopy on target side: -To enable zcopy in perf test you need to add Target setting in config.json file: +By default script uses config.json configuration file in the scripts/perf/nvmf +directory. You can specify a different configuration file at runtime as below: +``` ~sh +sudo PYTHONPATH=$PYTHONPATH:$PWD/scripts scripts/perf/nvmf/run_nvmf.py /path/to/config.json ``` -"zcopy_settings": bool -``` -example: -``` -"zcopy_settings": true -``` -# Scheduler settings in NVMe-oF performance scripts -To enable dynamic scheduler in perf test you need to add Target setting in config.json file: -``` -"scheduler_settings": [scheduler_name] -``` -example: -``` -"scheduler_settings": [static] -``` +PYTHONPATH environment variable is needed because script uses SPDK-local Python +modules. If you'd like to get rid of `PYTHONPATH=$PYTHONPATH:$PWD/scripts` +you need to modify your environment so that Python interpreter is aware of +`spdk/scripts` directory. + +## Test Results + +Test results for all workload combinations are printed to screen once the tests +are finished. Additionally all aggregate results are saved to /tmp/results/nvmf_results.conf diff --git a/scripts/perf/nvmf/config.json b/scripts/perf/nvmf/config.json index d8b16be93..f846691a0 100644 --- a/scripts/perf/nvmf/config.json +++ b/scripts/perf/nvmf/config.json @@ -7,22 +7,24 @@ "target": { "nic_ips": ["192.0.1.1", "192.0.2.1"], "mode": "spdk", - "use_null_block": false, + "null_block_devices": 0, "nvmet_dir": "/path/to/nvmetcli", - "num_cores": "1", + "num_cores": "[1]", "num_shared_buffers": 4096 }, "initiator1": { "ip": "10.0.0.1", - "nic_ips": ["192.0.1.1"], + "nic_ips": ["192.0.1.2"], + "remote_nic_ips": ["192.0.1.1"], "mode": "spdk", - "nvmecli_dir": "/path/to/nvmecli", - "fio_dir": "/path/to/fio binary", - "extra_params": "Extra nvme connect params" + "fio_bin": "/path/to/fio/bin", + "nvmecli_bin": "/path/to/nvmecli/bin", + "num_cores": 4 }, "initiator2": { "ip": "10.0.0.2", - "nic_ips": ["192.0.2.1"], + "nic_ips": ["192.0.2.2"], + "remote_nic_ips": ["192.0.2.1"], "mode": "spdk" }, "fio": {