From 597688b2b1226500e38eba7799304cbbcb8793cc Mon Sep 17 00:00:00 2001 From: Konrad Sztyber Date: Thu, 10 Jun 2021 12:35:05 +0200 Subject: [PATCH] scripts/trace: use ijson to parse the traces Since the trace files can get very large (several GBs), parsing them using python's json module might require an unfeasible amount of memory, as it needs to load the whole file first. The ijson [1] library provides interfaces for parsing files iteratively, only loading a small portion of a file at a time. It requires the input JSON to have the tsc_rate and the definitions of the tracepoints listed before the tracepoint entries. It's not a big deal, as this is the way `spdk_trace` generates it, but it's worth noting, as passing that file through something like `jq -S` might make it unreadable to the trace script. [1] https://pypi.org/project/ijson Signed-off-by: Konrad Sztyber Change-Id: I03c0c3fb47091da615a3978b8d63edf4d876b811 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8275 Tested-by: SPDK CI Jenkins Community-CI: Mellanox Build Bot Reviewed-by: Jim Harris Reviewed-by: Tomasz Zawadzki Reviewed-by: Monica Kenguva --- scripts/bpf/trace.py | 46 +++++++++++++++++++++++++++++++--------- scripts/pkgdep/arch.sh | 1 + scripts/pkgdep/debian.sh | 1 + scripts/pkgdep/rhel.sh | 1 + 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/scripts/bpf/trace.py b/scripts/bpf/trace.py index e4b7985c6..5fa918694 100755 --- a/scripts/bpf/trace.py +++ b/scripts/bpf/trace.py @@ -4,7 +4,7 @@ from argparse import ArgumentParser from dataclasses import dataclass, field from itertools import islice from typing import Dict, List, TypeVar -import json +import ijson import os import re import subprocess @@ -168,21 +168,40 @@ class TraceEntry: class Trace: """Stores, parses, and prints out SPDK traces""" def __init__(self, file): - self._json = json.load(file) + self._parser = ijson.parse(file) self._objects = [] self._argfmt = {TracepointArgument.TYPE_PTR: lambda a: f'0x{a:x}'} - self.tpoints = {t.id: t for t in self._parse_tpoints()} - self.tsc_rate = self._json['tsc_rate'] + self.tpoints = {} + self._parse_defs() - def _parse_tpoints(self): - for tpoint in self._json.get('tpoints', []): - yield Tracepoint( - name=tpoint['name'], id=tpoint['id'], + def _parse_tpoints(self, tpoints): + for tpoint in tpoints: + tpoint_id = tpoint['id'] + self.tpoints[tpoint_id] = Tracepoint( + name=tpoint['name'], id=tpoint_id, new_object=tpoint['new_object'], args=[TracepointArgument(name=a['name'], argtype=a['type']) for a in tpoint.get('args', [])]) + def _parse_defs(self): + builder = None + for prefix, event, value in self._parser: + # If we reach entries array, there are no more tracepoint definitions + if prefix == 'entries': + break + elif prefix == 'tsc_rate': + self.tsc_rate = value + continue + + if (prefix, event) == ('tpoints', 'start_array'): + builder = ijson.ObjectBuilder() + if builder is not None: + builder.event(event, value) + if (prefix, event) == ('tpoints', 'end_array'): + self._parse_tpoints(builder.value) + builder = None + def _parse_entry(self, entry): tpoint = self.tpoints[entry['tpoint']] obj = entry.get('object', {}) @@ -193,8 +212,15 @@ class Trace: args={n.name: v for n, v in zip(tpoint.args, entry.get('args', []))}) def _entries(self): - for entry in self._json.get('entries', []): - yield self._parse_entry(entry) + builder = None + for prefix, event, value in self._parser: + if (prefix, event) == ('entries.item', 'start_map'): + builder = ijson.ObjectBuilder() + if builder is not None: + builder.event(event, value) + if (prefix, event) == ('entries.item', 'end_map'): + yield self._parse_entry(builder.value) + builder = None def _annotate_args(self, entry): annotations = {} diff --git a/scripts/pkgdep/arch.sh b/scripts/pkgdep/arch.sh index 853c83bd2..5dcbdc30a 100755 --- a/scripts/pkgdep/arch.sh +++ b/scripts/pkgdep/arch.sh @@ -7,6 +7,7 @@ pacman -Sy --needed --noconfirm gcc make cmake cunit libaio openssl \ pacman -Sy --needed --noconfirm python-pexpect python-pip libffi pip install configshell_fb pip install pyelftools +pip install ijson # Additional dependencies for DPDK pacman -Sy --needed --noconfirm numactl nasm # Additional dependencies for ISA-L used in compression diff --git a/scripts/pkgdep/debian.sh b/scripts/pkgdep/debian.sh index 40149af15..9c07ca688 100755 --- a/scripts/pkgdep/debian.sh +++ b/scripts/pkgdep/debian.sh @@ -13,6 +13,7 @@ if ! pip3 install meson; then apt-get install -y meson fi pip3 install pyelftools +pip3 install ijson # Additional dependencies for SPDK CLI - not available on older Ubuntus apt-get install -y python3-configshell-fb python3-pexpect || echo \ "Note: Some SPDK CLI dependencies could not be installed." diff --git a/scripts/pkgdep/rhel.sh b/scripts/pkgdep/rhel.sh index 00c82c989..19abdf03f 100755 --- a/scripts/pkgdep/rhel.sh +++ b/scripts/pkgdep/rhel.sh @@ -95,6 +95,7 @@ yum install -y python3-pip pip3 install ninja pip3 install meson pip3 install pyelftools +pip3 install ijson # Additional dependencies for SPDK CLI - not available in rhel and centos if ! echo "$ID $VERSION_ID" | grep -E -q 'rhel 7|centos 7'; then