feat: perf-pyrecord and perf-pyscript to inject python stacktraces

2020-12-16 13:14:04 +01:00 · 2020-12-16 13:14:04 +01:00 · 93d9e8251f
parent ede531f7d7
commit 93d9e8251f
8 changed files with 438 additions and 88 deletions
--- a/per4m/main.py
+++ b/per4m/main.py
@ -7,8 +7,10 @@ optional arguments:
  -h, --help          show this help message and exit
 positional arguments:
    perf2trace          convert perf.data to TraceEvent json data
    giltracer           Run VizTracer and perf, and merge the result to see where the GIL is active.
    record              Run VizTracer and perf simultaneously. See also man perf record.
    script              Take stacktraces from VizTracer, and inject them in perf script output.
    perf2trace          Convert perf.data to TraceEvent JSON data.
 Examples:
 $ perf script --no-inline | per4m -v
@ -26,6 +28,12 @@ def main(args=sys.argv):
    elif len(args) > 1 and args[1] == "giltracer":
        from .giltracer import main
        main([os.path.basename(args[0]) + " " + args[1]] + args[2:])
    elif len(args) > 1 and args[1] == "record":
        from .record import main
        main([os.path.basename(args[0]) + " " + args[1]] + args[2:])
    elif len(args) > 1 and args[1] == "script":
        from .script import main
        main([os.path.basename(args[0]) + " " + args[1]] + args[2:])
    else:
        print(usage)
        sys.exit(0)
--- a/per4m/example4.py
+++ b/per4m/example4.py
@ -0,0 +1,15 @@
 import time
 values = [('foo', 3), ('bar', 42), ('bazz', 2)]
 def key_function(item):
    time.sleep(0.01)
    name, index = item
    return index
 def do_sort():
    values.sort(key=key_function)
 do_sort()
 def do_print():
    print(values)
 do_print()
--- a/per4m/giltracer.py
+++ b/per4m/giltracer.py
@ -1,89 +1,32 @@
 import argparse
 import os
 import shlex
 import subprocess
 import sys
 import time
 import signal
 import viztracer
 from viztracer.report_builder import ReportBuilder
 import runpy
-
+from .record import PerfRecord
 RETRIES = 10
 usage = """
-Convert perf.data to Trace Event Format.
+Run VizTracer and perf simultaneously, then inject the GIL information in the viztracer output
 Usage:
-$ per4m giltracer -m per4m.example1
+$ giltracer -m per4m.example1
 """
-class GilTracer:
+class GilTracer(PerfRecord):
-    def __init__(self, perf_output='perf.data', trace_output='giltracer.json', verbose=1):
+    def __init__(self, output='perf.data', trace_output='giltracer.json', verbose=1):
-        self.perf_output = perf_output
+        super().__init__(output=output, verbose=verbose, args=["-e 'sched:*'"])
        self.trace_output = trace_output
        self.verbose = verbose
    def __enter__(self):
        pid = os.getpid()
        cmd = f"perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.perf_output}"
        if self.verbose >= 2:
            print(f"Running: {cmd}")
        args = shlex.split(cmd)
        self.perf = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
        start_time = time.time()
        for _ in range(RETRIES):
            if os.path.exists(self.perf_output):
                mtime = os.path.getmtime(self.perf_output)
                if mtime > start_time:
                    break
            # we need to wait till perf creates the file
            time.sleep(0.1)
        else:
            self.finish()
            raise OSError(f'perf did not create {self.perf_output}')
        start_size = os.path.getsize(self.perf_output)
        for _ in range(RETRIES):
            size = os.path.getsize(self.perf_output)
            if size > start_size:
                break
            # we need to wait till perf writes
            time.sleep(0.1)
        else:
            self.finish()
            raise OSError(f'perf did not write to {self.perf_output}')
        # and give perf a bit more time
        time.sleep(0.05)
        return self
    def finish(self):
        self.perf.terminate()
        outs, errs = self.perf.communicate(timeout=5)
        if self.verbose >= 1:
            print(outs.decode('utf8'))
        if errs:
            if self.verbose >= 1:
                print(errs.decode('utf8'))
        if self.perf.returncode not in [0, -signal.SIGTERM.value]:
            print(signal.SIGTERM)
            raise OSError(f'perf record fails, got exit code {self.perf.returncode}')
    def __exit__(self, *args):
-        self.finish()
+        super().__exit__(self, *args)
        if self.verbose >= 1:
            print('Wait for perf to finish...')
        self.perf.wait()
        pid = os.getpid()
        verbose = '-q ' + '-v ' * self.verbose
-        cmd = f"perf script -i {self.perf_output} --no-inline | per4m perf2trace -o {self.trace_output} {verbose}"
+        cmd = f"perf script -i {self.output} --no-inline | per4m perf2trace -o {self.trace_output} {verbose}"
        if os.system(cmd) != 0:
            raise OSError(f'Failed to run perf or per4m perf2trace, command:\n$ {cmd}')
--- a/per4m/perf2trace.py
+++ b/per4m/perf2trace.py
@ -1,25 +1,7 @@
 import sys
 import json
 import argparse
-
+from .perfutils import read_events
 def read_events(input):
    first_line = True
    stacktrace = []
    header = None
    for line in sys.stdin:
        line = line.strip()
        if first_line:
            header = line
            stacktrace = []
            first_line = False
        else:
            if not line:  # done
                yield header, stacktrace
                first_line = True
                continue
            stacktrace.append(line)
 def parse_values(parts, **types):
    values = {}
@ -48,7 +30,7 @@ def drops_gil(stacktrace):
 usage = """
-Convert perf.data to Trace Event Format.
+Convert perf.data to TraceEvent JSON data.
 Usage:
--- a/per4m/perfutils.py
+++ b/per4m/perfutils.py
@ -0,0 +1,34 @@
 def read_events(input):
    first_line = True
    stacktrace = []
    header = None
    for line in input:
        line = line.strip()
        if first_line:
            header = line
            stacktrace = []
            first_line = False
        else:
            if not line:  # done
                yield header, stacktrace
                first_line = True
                continue
            stacktrace.append(line)
    if stacktrace:
        yield header, stacktrace
 def parse_header(header):
    parts = header.split()
    event = parts[4][:-1] # strip off ':'
    if ":" in event:  # tracepoint
        dso, triggerpid, cpu, time, _, *other = parts
        time = float(time[:-1]) * 1e6
        values = dict(dso=dso, triggerpid=int(triggerpid), cpu=cpu, time=time)
        tracepoint = True
    else:  # counter etc
        dso, triggerpid, time, count, _, *other = parts
        time = float(time[:-1]) * 1e6
        values = dict(dso=dso, triggerpid=int(triggerpid), count=count, time=time)
        tracepoint = False
    return values, other, tracepoint
--- a/per4m/record.py
+++ b/per4m/record.py
@ -0,0 +1,162 @@
 import argparse
 import contextlib
 import os
 import runpy
 import shlex
 import subprocess
 import signal
 import sys
 import time
 import viztracer
 import numpy
 RETRIES = 10
 usage = """
 Run VizTracer and perf simultaneously. See also man perf record.
 Usage:
 $ perf-pyrecord -e cycles -m per4m.example2
 """
 class PerfRecord:
    def __init__(self, output='perf.data', args=[], verbose=1):
        self.output = output
        self.args = args
        self.verbose = verbose
    def __enter__(self):
        pid = os.getpid()
        # cmd = f"perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.output}"
        perf_args = ' '.join(self.args)
        cmd = f"perf record {perf_args} --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.output}"
        if self.verbose >= 2:
            print(f"Running: {cmd}")
        args = shlex.split(cmd)
        self.perf = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
        start_time = time.time()
        for _ in range(RETRIES):
            if os.path.exists(self.output):
                mtime = os.path.getmtime(self.output)
                if mtime > start_time:
                    break
            # we need to wait till perf creates the file
            time.sleep(0.1)
        else:
            self.finish()
            raise OSError(f'perf did not create {self.output}')
        start_size = os.path.getsize(self.output)
        for _ in range(RETRIES):
            size = os.path.getsize(self.output)
            if size > start_size:
                break
            # we need to wait till perf writes
            time.sleep(0.1)
        else:
            self.finish()
            raise OSError(f'perf did not write to {self.output}')
        # and give perf a bit more time
        time.sleep(0.05)
        return self
    def finish(self):
        self.perf.terminate()
        outs, errs = self.perf.communicate(timeout=5)
        if self.verbose >= 1:
            print(outs.decode('utf8'))
        if errs:
            if self.verbose >= 1:
                print(errs.decode('utf8'))
        if self.perf.returncode not in [0, -signal.SIGTERM.value]:
            print(signal.SIGTERM)
            raise OSError(f'perf record fails, got exit code {self.perf.returncode}')
    def __exit__(self, *args):
        self.finish()
        if self.verbose >= 1:
            print('Wait for perf to finish...')
        self.perf.wait()
@contextlib.contextmanager
 def empty_context():
    yield
 def main(argv=sys.argv):
    parser = argparse.ArgumentParser(argv[0],
        formatter_class=argparse.RawDescriptionHelpFormatter,
        usage=usage)
    parser.add_argument('--module', '-m')
    parser.add_argument('--import', dest="import_", help="Comma seperated list of modules to import before tracing (cleans up tracing output)")
    parser.add_argument('--verbose', '-v', action='count', default=1)
    parser.add_argument('--quiet', '-q', action='count', default=0)
    parser.add_argument('--output', '-o', dest="output", help="Output filename (default %(default)s)", default='perf.data')
    parser.add_argument('--output-viztracer', help="Output filename for viztracer (default %(default)s)", default='viztracer.json')
    parser.add_argument('--freq', '-F', help="Profile frequency, passed down to perf record (see man perf record)")
    parser.add_argument('--event', '-e', dest='events', help="Select PMU event, passed down to perf record (see man perf record)", action='append', default=[])
    parser.add_argument('--tracer_entries', type=int, default=1000000, help="See viztracer --help")
    # # these gets passed to stacktraceinject
    # parser.add_argument('--keep-cpython-evals', help="keep CPython evaluation stacktraces (instead of replacing) (default: %(default)s)", default=True, action='store_true')
    # parser.add_argument('--no-keep-cpython-evals', dest="keep_cpython_evals", action='store_false')
    # parser.add_argument('--allow-mismatch', help="Keep going even when we cannot match the C and Python stacktrace (default: %(default)s)", default=False, action='store_true')
    # parser.add_argument('--no-allow-mismatch', dest="allow_mismatch", action='store_false')
    # parser.add_argument('--pedantic', help="If false, accept known stack mismatch issues (default: %(default)s)", default=False, action='store_true')
    # parser.add_argument('--no-pedantic', dest="pedantic", action='store_false')
    parser.add_argument('args', nargs=argparse.REMAINDER)
    args = parser.parse_args(argv[1:])
    verbose = args.verbose - args.quiet
    if args.import_:
        for module in args.import_.split(','):
            if verbose >= 2:
                print(f'importing {module}')
                __import__(module)
    viztracer_path = args.output_viztracer
    ctx = contextlib.redirect_stdout(None) if verbose == 0 else empty_context()
    perf_args = []
    if args.freq:
        perf_args.append(f' --freq={args.freq}')
    for event in args.events:
        perf_args.append(f' -e {event}')
    with ctx:
        with PerfRecord(verbose=verbose, args=perf_args) as perf:
            with viztracer.VizTracer(output_file=viztracer_path, verbose=verbose, tracer_entries=args.tracer_entries):
                if args.module:
                    runpy.run_module(args.module)
                else:
                    sys.argv = args.args
                    runpy.run_path(sys.argv[0])
    # forward = ""
    # if args.keep_cpython_evals:
    #     forward += ' --keep-cpython-evals'
    # else:
    #     forward += ' --no-keep-cpython-evals'
    # if args.allow_mismatch:
    #     forward += ' --allow-mismatch'
    # else:
    #     forward += ' --no-allow-mismatch'
    # if args.pedantic:
    #     forward += ' --pedantic'
    # else:
    #     forward += ' --no-pedantic'
    # if args.output:
    #     forward += ' --output=args.output'
    # cmd = f"perf script -i {perf.perf_output} --no-inline | per4m stacktraceinject {forward} --input {viztracer_path}"
    # if os.system(cmd) != 0:
    #     raise OSError(f'Failed to run perf or per4m perf2trace, command:\n$ {cmd}')
 if __name__ == '__main__':
    main()
--- a/per4m/script.py
+++ b/per4m/script.py
@ -0,0 +1,201 @@
 import argparse
 import json
 import shlex
 import subprocess
 import sys
 from viztracer.prog_snapshot import ProgSnapshot
 from .perfutils import read_events, parse_header
 usage = """
 Take stacktraces from VizTracer, and inject them in perf script output
 Usage:
 $ perf-pyscript | per4m stacktraceinject --input viztracer.json
 """
 def print_stderr(*args):
    print(*args, file=sys.stderr)
 def stacktrace_inject(stacktrace, snapshot, pid, time, keep_cpython_evals=False, perror=print_stderr, allow_mismatch=False, pedantic=False):
    # we'll do in place modification
    stacktrace = stacktrace.copy()
    # First, we build op parts of the callstack from VizTracers
    # We can go into and out of Python/C several times
    pystacktraces = [[]]
    pystacktrace_part = pystacktraces[-1]
    snapshot.goto_tid(pid)
    snapshot.goto_timestamp(time)
    bottom_frame = frame = snapshot.curr_frame
    was_in_python = frame.node.is_python
    while frame:
        node = frame.node
        # output should become sth like: ffffffff9700008c entry_SYSCALL_64_after_hwframe+0x44 ([kernel.kallsyms])
        filename = node.filename
        if filename is None:
            location = ''
        else:
            if 'frozen' in filename:
                filename = '<frozen>'
            location = f"::{filename}:{node.lineno}"
        funcname = node.funcname or node.fullname
        if node.is_python:
            type = 'py'
        else:
            type = 'cext'
        # lineno = f':{node.lineno}' if node.lineno is not None else ''
        if node.is_python:
            if not was_in_python:
                # this is a jump back to into c, which we allow
                # pystacktraces.append([])
                # pystacktrace_part = pystacktraces[-1]
                pass
        else:
            if was_in_python:
                # this is a jump into Python
                pystacktraces.append([])
                pystacktrace_part = pystacktraces[-1]
        pystacktrace_part.append(f'000000000000000000000000 {type}::{funcname}{location} ([{filename}])')
        was_in_python = node.is_python
        frame = frame.parent
    # Next, we find where in the perf output, we were in the Python evaluate loop (multiple places possible)
    # A list of (index_min, index_max) where the Python stacktraces should be injected/replaced
    pyslices = []
    index_min = None
    index_max = None
    is_py = [False] * len(stacktrace)
    # The C Part of Python callstack should begin with a function with this name in it
    begin_functions = ['PyEval_Eval']
    # and all function call after that may be one that contains these names
    continue_functions = ['PyEval_Eval', 'PyFunction_FastCall', 'function_code_fastcall', 'method_call', 'call_function', 'PyCFunction_FastCall', 'PyObject_FastCall']
    # we could make the code simpler by flipping the stacktrace, so the bottom of the stack starts at index 0
    # (instead of reversed)
    for i, call in enumerate(stacktrace[::-1]):
        i = len(stacktrace) - i - 1
        # Or we found the beginning, or a continuation of the evaluation loop
        if (index_max is None and any(sig in call for sig in begin_functions)) or\
            (index_max is not None and any(sig in call for sig in continue_functions)):
            if index_max is None:
                index_max = i
            index_min = i
        else:
            if index_max is not None:
                pyslices.append((index_min, index_max))
                is_py[index_min:index_max] = [True] * (index_max - index_min)
                index_min = index_max = None
    if index_max is not None:
        pyslices.append((index_min, index_max))
    # Lets give some nice output in case it does not match up
    if len(pystacktraces) != len(pyslices):
        if not pedantic:
            # for these we know that we often get a mismatch, and they seem reasonable to ignore
            known_failures = ['pythread_wrapper', 'switch_fpu_return', 'do_fork', 'ret_from_fork', 'start_thread']
            for known_failure in known_failures:
                if any(known_failure in call for call in stacktrace) and not any('_Py' in call for call in stacktrace):
                    perror(f'Seen {known_failure} in stacktrace during, silently ignoring the stack mismatch, use --no-pedantic to not ignore this')
                    return stacktrace
        perror(f"OOOOPS, could not match stacktraces at time {time} for pid {pid}!")
        perror("Where VizTracer thinks we are:")
        perror(bottom_frame.node.fullname)
        try:
            bottom_frame.show(perror)
        except:
            perror('Oops, viztracer fails showing')
        # snap.where()
        perror("Callstack slices where we think we should inject the Python callstack:")
        for pyslice in pyslices:
            perror(pyslice)
        perror("Callstack from perf, where * indicates where we think the Python eval loop is:")
        for i, call in enumerate(stacktrace):
            perror(i, '*' if is_py[i] else ' ', call)
        perror("Python call stacks, as found from VizTracer:")
        for pystacktraces_part in pystacktraces:
            perror('-----')
            for i, call in enumerate(pystacktraces_part):
                perror(call)
        perror('-----')
        if not allow_mismatch:
            raise ValueError('Stack traces from VizTracer and perf could not be matched, run with --allow-mismatch and inspect output carefully')
    else:
        for pyslice, pystacktraces_part in zip(pyslices, reversed(pystacktraces)):
            index_min, index_max = pyslice
            if keep_cpython_evals:
                for i in range(index_min, index_max+1):
                    ptr, call = stacktrace[i].split(' ', 1)
                    stacktrace[i] = f'{ptr} cpyeval::{call}'
            else:
                # mark for delection, to avoid indices getting messed up
                stacktrace[index_min:index_max+1] = [None] * (index_max - index_min + 1)
            for pystack in reversed(pystacktraces_part):
                stacktrace.insert(index_min, pystack)
        stacktrace = [k for k in stacktrace if k is not None]
    return stacktrace
 def main(argv=sys.argv):
    parser = argparse.ArgumentParser(argv[0],
        formatter_class=argparse.RawDescriptionHelpFormatter,
        usage=usage)
    parser.add_argument('--verbose', '-v', action='count', default=0)  # by default quiet, since we write to stdout
    parser.add_argument('--quiet', '-q', action='count', default=0)
    parser.add_argument('--keep-cpython-evals', help="keep CPython evaluation stacktraces (instead of replacing) (default: %(default)s)", default=False, action='store_true')
    parser.add_argument('--no-keep-cpython-evals', dest="keep_cpython_evals", action='store_false')
    parser.add_argument('--allow-mismatch', help="Keep going even when we cannot match the C and Python stacktrace (default: %(default)s)", default=False, action='store_true')
    parser.add_argument('--no-allow-mismatch', dest="allow_mismatch", action='store_false')
    parser.add_argument('--pedantic', help="If false, accept known stack mismatch issues (default: %(default)s)", default=False, action='store_true')
    parser.add_argument('--no-pedantic', dest="pedantic", action='store_false')
    parser.add_argument('--input', '-i', help="VizTracer input (default %(default)s)", default="viztracer.json")
    parser.add_argument('--output', '-o', dest="output", default=None, help="Output filename (default %(default)s)")
    args = parser.parse_args(argv[1:])
    verbose = args.verbose - args.quiet
    perf_script_args = ['--no-inline']
    perf_script_args = ' '.join(perf_script_args)
    cmd = f"perf script {perf_script_args}"
    if verbose >= 2:
        print(f"Running: {cmd}")
    perf_args = shlex.split(cmd)
    perf = subprocess.Popen(perf_args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) #, stdin=subprocess.PIPE)
    if args.output is None:
        output = sys.stdout
    else:
        output = open(args.output, "w")
    if verbose >= 1:
        print_stderr("Loading snapshot")
    with open(args.input, "r") as f:
        json_data = f.read()
    snap = ProgSnapshot(json_data)
    # find all pids (or tids)
    pids = list(snap.func_trees)
    for pid in pids.copy():
        pids.extend(list(snap.func_trees[pid]))
    t0 = min(event['ts'] for event in json.loads(json_data)['traceEvents'])
    for header, stacktrace in read_events(perf.stdout):
        print(header, file=output)
        values, _, _ = parse_header(header)
        time = values['time'] - t0
        triggerpid = values['triggerpid']
        if triggerpid in pids:
            try:
                stacktrace = stacktrace_inject(stacktrace, snap, triggerpid, time, keep_cpython_evals=args.keep_cpython_evals, allow_mismatch=args.allow_mismatch, pedantic=args.pedantic)
                for call in stacktrace:
                    print(call, file=output)
            except:
                print(f"Error for event: {header}")
                raise
        print(file=output)
--- a/setup.py
+++ b/setup.py
@ -16,6 +16,11 @@ setup(
    use_scm_version=True,
    setup_requires=['setuptools_scm'],
    entry_points={
-        'console_scripts': ['per4m = per4m.__main__:main']
+        'console_scripts': [
            'per4m = per4m.__main__:main',
            'giltracer = per4m.giltracer:main',
            'perf-pyrecord = per4m.record:main',
            'perf-pyscript = per4m.script:main',
        ]
    }
 )