scripts/nvmf_perf: add irq_settings option

Allow user to specify the way IRQ alignment for network interfaces used in test should be done. Change-Id: Ib835d2ac2bc0c7b79474e617de32b96a483e436b Signed-off-by: Karol Latecki <karol.latecki@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15737 Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2022-12-01 13:41:29 +01:00 · 2022-12-01 13:41:29 +01:00 · cdab715b28
commit cdab715b28
parent 7be3200f55
2 changed files with 131 additions and 28 deletions
--- a/scripts/perf/nvmf/README.md
+++ b/scripts/perf/nvmf/README.md
@ -95,7 +95,12 @@ Optional:
  "zcopy_settings": false,
  "dif_insert_strip": true,
  "null_block_dif_type": 3,
-  "pm_settings": [true, 30, 1, 60]
+  "pm_settings": [true, 30, 1, 60],
+  "irq_settings": {
+    "mode": "cpulist",
+    "cpulist": "[0-10]",
+    "exclude_cpulist": false
+  }
 }
 ```

@ -131,6 +136,18 @@ Optional, common:
 - enable_pm - bool;
  if bool is set to true, power measurement is enabled via collect-bmc-pm on
  the target side. Default: true.
+- irq_settings - dict;
+  Choose how to adjust network interface IRQ settings.
+  mode: default - run IRQ alignment script with no additional options.
+  mode: bynode - align IRQs to be processed only on CPU cores matching NIC
+    NUMA node.
+  mode: cpulist - align IRQs to be processed only on CPU cores provided
+    in the cpulist parameter.
+  cpulist: list of CPU cores to use for cpulist mode. Can be provided as
+    list of individual cores ("[0,1,10]"), core ranges ("[0-10]"), or mix
+    of both ("[0-1,10,20-22]")
+  exclude_cpulist: reverse the effect of cpulist mode. Allow IRQ processing
+    only on CPU cores which are not provided in cpulist parameter.

 Optional, Kernel Target only:

@ -163,6 +180,15 @@ Optional, SPDK Target only:
  Accelerator (DSA) engine.
 - scheduler_core_limit - int, 0-100. Dynamic scheduler option to load limit on
  the core to be considered full.
+- irq_settings - dict;
+  Choose how to adjust network interface IRQ settings.
+  Same as in common options section, but SPDK Target allows more modes:
+  mode: shared - align IRQs to be processed only on the same CPU cores which
+    are already used by SPDK Target process.
+  mode: split - align IRQs to be processed only on CPU cores which are not
+    used by SPDK Target process.
+  mode: split-bynode - same as "split", but reduce the number of CPU cores
+    to use for IRQ processing to only these matching NIC NUMA node.

 ### Initiator system settings section

@ -181,7 +207,8 @@ There can be one or more `initiatorX` setting sections, depending on the test se
  "num_cores": 4,
  "cpu_frequency": 2100000,
  "adq_enable": false,
-  "kernel_engine": "io_uring"
+  "kernel_engine": "io_uring",
+  "irq_settings": { "mode": "bynode" }
 }
 ```

@ -231,6 +258,8 @@ Optional, common:
  Available options:
  - libaio (default)
  - io_uring
+- irq_settings - dict;
+  Same as "irq_settings" in Target common options section.

 Optional, SPDK Initiator only:

--- a/scripts/perf/nvmf/run_nvmf.py
+++ b/scripts/perf/nvmf/run_nvmf.py
@ -54,10 +54,13 @@ class Server:

        self.enable_adq = False
        self.adq_priority = None
+        self.irq_settings = {"mode": "default"}
+
        if "adq_enable" in server_config and server_config["adq_enable"]:
            self.enable_adq = server_config["adq_enable"]
            self.adq_priority = 1
-
+        if "irq_settings" in server_config:
+            self.irq_settings.update(server_config["irq_settings"])
        if "tuned_profile" in server_config:
            self.tuned_profile = server_config["tuned_profile"]

@ -121,7 +124,7 @@ class Server:
        self.configure_sysctl()
        self.configure_tuned()
        self.configure_cpu_governor()
-        self.configure_irq_affinity()
+        self.configure_irq_affinity(**self.irq_settings)

    def load_drivers(self):
        self.log.info("Loading drivers")
@ -313,13 +316,79 @@ class Server:
        self.governor_restore = self.exec_cmd(["cat", "/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"]).strip()
        self.exec_cmd(["sudo", "cpupower", "frequency-set", "-g", "performance"])

-    def configure_irq_affinity(self):
-        self.log.info("Setting NIC irq affinity for NICs...")
+    def get_core_list_from_mask(self, core_mask):
+        # Generate list of individual cores from hex core mask
+        # (e.g. '0xffff') or list containing:
+        # - individual cores (e.g. '1, 2, 3')
+        # - core ranges (e.g. '0-3')
+        # - mix of both (e.g. '0, 1-4, 9, 11-13')
+
+        core_list = []
+        if "0x" in core_mask:
+            core_mask_int = int(core_mask, 16)
+            for i in range(core_mask_int.bit_length()):
+                if (1 << i) & core_mask_int:
+                    core_list.append(i)
+            return core_list
+        else:
+            # Core list can be provided in .json config with square brackets
+            # remove them first
+            core_mask = core_mask.replace("[", "")
+            core_mask = core_mask.replace("]", "")
+
+            for i in core_mask.split(","):
+                if "-" in i:
+                    start, end = i.split("-")
+                    core_range = range(int(start), int(end) + 1)
+                    core_list.extend(core_range)
+                else:
+                    core_list.append(int(i))
+            return core_list
+
+    def configure_irq_affinity(self, mode="default", cpulist=None, exclude_cpulist=False):
+        self.log.info("Setting NIC irq affinity for NICs. Using %s mode" % mode)
+
+        if mode not in ["default", "bynode", "cpulist"]:
+            raise ValueError("%s irq affinity setting not supported" % mode)
+
+        if mode == "cpulist" and not cpulist:
+            raise ValueError("%s irq affinity setting set, but no cpulist provided" % mode)
+
+        affinity_script = "set_irq_affinity.sh"
+        if "default" not in mode:
+            affinity_script = "set_irq_affinity_cpulist.sh"
+            system_cpu_map = self.get_numa_cpu_map()
+        irq_script_path = os.path.join(self.irq_scripts_dir, affinity_script)
+
+        def cpu_list_to_string(cpulist):
+            return ",".join(map(lambda x: str(x), cpulist))

-        irq_script_path = os.path.join(self.irq_scripts_dir, "set_irq_affinity.sh")
        nic_names = [self.get_nic_name_by_ip(n) for n in self.nic_ips]
-        for nic in nic_names:
-            irq_cmd = ["sudo", irq_script_path, nic]
+        for nic_name in nic_names:
+            irq_cmd = ["sudo", irq_script_path]
+
+            # Use only CPU cores matching NIC NUMA node.
+            # Remove any CPU cores if they're on exclusion list.
+            if mode == "bynode":
+                irq_cpus = system_cpu_map[self.get_nic_numa_node(nic_name)]
+                if cpulist and exclude_cpulist:
+                    disallowed_cpus = self.get_core_list_from_mask(cpulist)
+                    irq_cpus = list(set(irq_cpus) - set(disallowed_cpus))
+                    if not irq_cpus:
+                        raise Exception("No CPUs left to process IRQs after excluding CPUs!")
+                irq_cmd.append(cpu_list_to_string(irq_cpus))
+
+            if mode == "cpulist":
+                irq_cpus = self.get_core_list_from_mask(cpulist)
+                if exclude_cpulist:
+                    # Flatten system CPU list, we don't need NUMA awareness here
+                    system_cpu_list = sorted({x for v in system_cpu_map.values() for x in v})
+                    irq_cpus = list(set(system_cpu_list) - set(irq_cpus))
+                    if not irq_cpus:
+                        raise Exception("No CPUs left to process IRQs after excluding CPUs!")
+                irq_cmd.append(cpu_list_to_string(irq_cpus))
+
+            irq_cmd.append(nic_name)
            self.log.info(irq_cmd)
            self.exec_cmd(irq_cmd, change_dir=self.irq_scripts_dir)

@ -977,11 +1046,11 @@ class KernelTarget(Target):

 class SPDKTarget(Target):
    def __init__(self, name, general_config, target_config):
-        super().__init__(name, general_config, target_config)
-
        # Required fields
        self.core_mask = target_config["core_mask"]
-        self.num_cores = self.get_num_cores(self.core_mask)
+        self.num_cores = len(self.get_core_list_from_mask(self.core_mask))
+
+        super().__init__(name, general_config, target_config)

        # Defaults
        self.dif_insert_strip = False
@ -1011,6 +1080,27 @@ class SPDKTarget(Target):
        self.log.info("====DSA settings:====")
        self.log.info("DSA enabled: %s" % (self.enable_dsa))

+    def configure_irq_affinity(self, mode="default", cpulist=None, exclude_cpulist=False):
+        if mode not in ["default", "bynode", "cpulist",
+                        "shared", "split", "split-bynode"]:
+            self.log.error("%s irq affinity setting not supported" % mode)
+            raise Exception
+
+        # Create core list from SPDK's mask and change it to string.
+        # This is the type configure_irq_affinity expects for cpulist parameter.
+        spdk_tgt_core_list = self.get_core_list_from_mask(self.core_mask)
+        spdk_tgt_core_list = ",".join(map(lambda x: str(x), spdk_tgt_core_list))
+        spdk_tgt_core_list = "[" + spdk_tgt_core_list + "]"
+
+        if mode == "shared":
+            super().configure_irq_affinity(mode="cpulist", cpulist=spdk_tgt_core_list)
+        elif mode == "split":
+            super().configure_irq_affinity(mode="cpulist", cpulist=spdk_tgt_core_list, exclude_cpulist=True)
+        elif mode == "split-bynode":
+            super().configure_irq_affinity(mode="bynode", cpulist=spdk_tgt_core_list, exclude_cpulist=True)
+        else:
+            super().configure_irq_affinity(mode=mode, cpulist=cpulist, exclude_cpulist=exclude_cpulist)
+
    def adq_set_busy_read(self, busy_read_val):
        return {"net.core.busy_read": busy_read_val}

@ -1030,22 +1120,6 @@ class SPDKTarget(Target):
                bdev_bdfs.append(bdev_traddr)
        return bdev_bdfs

-    @staticmethod
-    def get_num_cores(core_mask):
-        if "0x" in core_mask:
-            return bin(int(core_mask, 16)).count("1")
-        else:
-            num_cores = 0
-            core_mask = core_mask.replace("[", "")
-            core_mask = core_mask.replace("]", "")
-            for i in core_mask.split(","):
-                if "-" in i:
-                    x, y = i.split("-")
-                    num_cores += len(range(int(x), int(y))) + 1
-                else:
-                    num_cores += 1
-            return num_cores
-
    def spdk_tgt_configure(self):
        self.log.info("Configuring SPDK NVMeOF target via RPC")