From 93d9e8251f81a2fec1d684a80f43b9f8262b3380 Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Wed, 16 Dec 2020 13:14:04 +0100 Subject: [PATCH] feat: perf-pyrecord and perf-pyscript to inject python stacktraces --- per4m/__main__.py | 10 ++- per4m/example4.py | 15 ++++ per4m/giltracer.py | 73 ++-------------- per4m/perf2trace.py | 22 +---- per4m/perfutils.py | 34 ++++++++ per4m/record.py | 162 +++++++++++++++++++++++++++++++++++ per4m/script.py | 201 ++++++++++++++++++++++++++++++++++++++++++++ setup.py | 9 +- 8 files changed, 438 insertions(+), 88 deletions(-) create mode 100644 per4m/example4.py create mode 100644 per4m/perfutils.py create mode 100644 per4m/record.py create mode 100644 per4m/script.py diff --git a/per4m/__main__.py b/per4m/__main__.py index 25f179e..7ea0d98 100644 --- a/per4m/__main__.py +++ b/per4m/__main__.py @@ -7,8 +7,10 @@ optional arguments: -h, --help show this help message and exit positional arguments: - perf2trace convert perf.data to TraceEvent json data giltracer Run VizTracer and perf, and merge the result to see where the GIL is active. + record Run VizTracer and perf simultaneously. See also man perf record. + script Take stacktraces from VizTracer, and inject them in perf script output. + perf2trace Convert perf.data to TraceEvent JSON data. Examples: $ perf script --no-inline | per4m -v @@ -26,6 +28,12 @@ def main(args=sys.argv): elif len(args) > 1 and args[1] == "giltracer": from .giltracer import main main([os.path.basename(args[0]) + " " + args[1]] + args[2:]) + elif len(args) > 1 and args[1] == "record": + from .record import main + main([os.path.basename(args[0]) + " " + args[1]] + args[2:]) + elif len(args) > 1 and args[1] == "script": + from .script import main + main([os.path.basename(args[0]) + " " + args[1]] + args[2:]) else: print(usage) sys.exit(0) diff --git a/per4m/example4.py b/per4m/example4.py new file mode 100644 index 0000000..6bca538 --- /dev/null +++ b/per4m/example4.py @@ -0,0 +1,15 @@ +import time + +values = [('foo', 3), ('bar', 42), ('bazz', 2)] + +def key_function(item): + time.sleep(0.01) + name, index = item + return index + +def do_sort(): + values.sort(key=key_function) +do_sort() +def do_print(): + print(values) +do_print() diff --git a/per4m/giltracer.py b/per4m/giltracer.py index 2589a40..16a5a8d 100644 --- a/per4m/giltracer.py +++ b/per4m/giltracer.py @@ -1,89 +1,32 @@ import argparse import os -import shlex -import subprocess import sys -import time -import signal - import viztracer from viztracer.report_builder import ReportBuilder import runpy - - -RETRIES = 10 +from .record import PerfRecord usage = """ -Convert perf.data to Trace Event Format. - +Run VizTracer and perf simultaneously, then inject the GIL information in the viztracer output Usage: -$ per4m giltracer -m per4m.example1 +$ giltracer -m per4m.example1 """ -class GilTracer: - def __init__(self, perf_output='perf.data', trace_output='giltracer.json', verbose=1): - self.perf_output = perf_output +class GilTracer(PerfRecord): + def __init__(self, output='perf.data', trace_output='giltracer.json', verbose=1): + super().__init__(output=output, verbose=verbose, args=["-e 'sched:*'"]) self.trace_output = trace_output self.verbose = verbose - def __enter__(self): - pid = os.getpid() - cmd = f"perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.perf_output}" - if self.verbose >= 2: - print(f"Running: {cmd}") - args = shlex.split(cmd) - self.perf = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - start_time = time.time() - for _ in range(RETRIES): - if os.path.exists(self.perf_output): - mtime = os.path.getmtime(self.perf_output) - if mtime > start_time: - break - # we need to wait till perf creates the file - time.sleep(0.1) - else: - self.finish() - raise OSError(f'perf did not create {self.perf_output}') - start_size = os.path.getsize(self.perf_output) - for _ in range(RETRIES): - size = os.path.getsize(self.perf_output) - if size > start_size: - break - # we need to wait till perf writes - time.sleep(0.1) - else: - self.finish() - raise OSError(f'perf did not write to {self.perf_output}') - # and give perf a bit more time - time.sleep(0.05) - return self - - def finish(self): - self.perf.terminate() - outs, errs = self.perf.communicate(timeout=5) - if self.verbose >= 1: - print(outs.decode('utf8')) - if errs: - if self.verbose >= 1: - print(errs.decode('utf8')) - if self.perf.returncode not in [0, -signal.SIGTERM.value]: - print(signal.SIGTERM) - raise OSError(f'perf record fails, got exit code {self.perf.returncode}') - def __exit__(self, *args): - self.finish() - if self.verbose >= 1: - print('Wait for perf to finish...') - self.perf.wait() - - pid = os.getpid() + super().__exit__(self, *args) verbose = '-q ' + '-v ' * self.verbose - cmd = f"perf script -i {self.perf_output} --no-inline | per4m perf2trace -o {self.trace_output} {verbose}" + cmd = f"perf script -i {self.output} --no-inline | per4m perf2trace -o {self.trace_output} {verbose}" if os.system(cmd) != 0: raise OSError(f'Failed to run perf or per4m perf2trace, command:\n$ {cmd}') diff --git a/per4m/perf2trace.py b/per4m/perf2trace.py index b49c533..e3d2abb 100644 --- a/per4m/perf2trace.py +++ b/per4m/perf2trace.py @@ -1,25 +1,7 @@ import sys import json import argparse - - -def read_events(input): - first_line = True - stacktrace = [] - header = None - for line in sys.stdin: - line = line.strip() - if first_line: - header = line - stacktrace = [] - first_line = False - else: - if not line: # done - yield header, stacktrace - first_line = True - continue - stacktrace.append(line) - +from .perfutils import read_events def parse_values(parts, **types): values = {} @@ -48,7 +30,7 @@ def drops_gil(stacktrace): usage = """ -Convert perf.data to Trace Event Format. +Convert perf.data to TraceEvent JSON data. Usage: diff --git a/per4m/perfutils.py b/per4m/perfutils.py new file mode 100644 index 0000000..0fabe78 --- /dev/null +++ b/per4m/perfutils.py @@ -0,0 +1,34 @@ +def read_events(input): + first_line = True + stacktrace = [] + header = None + for line in input: + line = line.strip() + if first_line: + header = line + stacktrace = [] + first_line = False + else: + if not line: # done + yield header, stacktrace + first_line = True + continue + stacktrace.append(line) + if stacktrace: + yield header, stacktrace + + +def parse_header(header): + parts = header.split() + event = parts[4][:-1] # strip off ':' + if ":" in event: # tracepoint + dso, triggerpid, cpu, time, _, *other = parts + time = float(time[:-1]) * 1e6 + values = dict(dso=dso, triggerpid=int(triggerpid), cpu=cpu, time=time) + tracepoint = True + else: # counter etc + dso, triggerpid, time, count, _, *other = parts + time = float(time[:-1]) * 1e6 + values = dict(dso=dso, triggerpid=int(triggerpid), count=count, time=time) + tracepoint = False + return values, other, tracepoint diff --git a/per4m/record.py b/per4m/record.py new file mode 100644 index 0000000..c22da47 --- /dev/null +++ b/per4m/record.py @@ -0,0 +1,162 @@ +import argparse +import contextlib +import os +import runpy +import shlex +import subprocess +import signal +import sys +import time + +import viztracer +import numpy + + +RETRIES = 10 + + +usage = """ + +Run VizTracer and perf simultaneously. See also man perf record. + +Usage: + +$ perf-pyrecord -e cycles -m per4m.example2 +""" + +class PerfRecord: + def __init__(self, output='perf.data', args=[], verbose=1): + self.output = output + self.args = args + self.verbose = verbose + + def __enter__(self): + pid = os.getpid() + # cmd = f"perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.output}" + perf_args = ' '.join(self.args) + cmd = f"perf record {perf_args} --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.output}" + if self.verbose >= 2: + print(f"Running: {cmd}") + args = shlex.split(cmd) + self.perf = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + start_time = time.time() + for _ in range(RETRIES): + if os.path.exists(self.output): + mtime = os.path.getmtime(self.output) + if mtime > start_time: + break + # we need to wait till perf creates the file + time.sleep(0.1) + else: + self.finish() + raise OSError(f'perf did not create {self.output}') + start_size = os.path.getsize(self.output) + for _ in range(RETRIES): + size = os.path.getsize(self.output) + if size > start_size: + break + # we need to wait till perf writes + time.sleep(0.1) + else: + self.finish() + raise OSError(f'perf did not write to {self.output}') + # and give perf a bit more time + time.sleep(0.05) + return self + + def finish(self): + self.perf.terminate() + outs, errs = self.perf.communicate(timeout=5) + if self.verbose >= 1: + print(outs.decode('utf8')) + if errs: + if self.verbose >= 1: + print(errs.decode('utf8')) + if self.perf.returncode not in [0, -signal.SIGTERM.value]: + print(signal.SIGTERM) + raise OSError(f'perf record fails, got exit code {self.perf.returncode}') + + def __exit__(self, *args): + self.finish() + if self.verbose >= 1: + print('Wait for perf to finish...') + self.perf.wait() + + +@contextlib.contextmanager +def empty_context(): + yield + + +def main(argv=sys.argv): + parser = argparse.ArgumentParser(argv[0], + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=usage) + parser.add_argument('--module', '-m') + parser.add_argument('--import', dest="import_", help="Comma seperated list of modules to import before tracing (cleans up tracing output)") + parser.add_argument('--verbose', '-v', action='count', default=1) + parser.add_argument('--quiet', '-q', action='count', default=0) + parser.add_argument('--output', '-o', dest="output", help="Output filename (default %(default)s)", default='perf.data') + parser.add_argument('--output-viztracer', help="Output filename for viztracer (default %(default)s)", default='viztracer.json') + + parser.add_argument('--freq', '-F', help="Profile frequency, passed down to perf record (see man perf record)") + parser.add_argument('--event', '-e', dest='events', help="Select PMU event, passed down to perf record (see man perf record)", action='append', default=[]) + + parser.add_argument('--tracer_entries', type=int, default=1000000, help="See viztracer --help") + + # # these gets passed to stacktraceinject + # parser.add_argument('--keep-cpython-evals', help="keep CPython evaluation stacktraces (instead of replacing) (default: %(default)s)", default=True, action='store_true') + # parser.add_argument('--no-keep-cpython-evals', dest="keep_cpython_evals", action='store_false') + # parser.add_argument('--allow-mismatch', help="Keep going even when we cannot match the C and Python stacktrace (default: %(default)s)", default=False, action='store_true') + # parser.add_argument('--no-allow-mismatch', dest="allow_mismatch", action='store_false') + # parser.add_argument('--pedantic', help="If false, accept known stack mismatch issues (default: %(default)s)", default=False, action='store_true') + # parser.add_argument('--no-pedantic', dest="pedantic", action='store_false') + + parser.add_argument('args', nargs=argparse.REMAINDER) + args = parser.parse_args(argv[1:]) + verbose = args.verbose - args.quiet + + if args.import_: + for module in args.import_.split(','): + if verbose >= 2: + print(f'importing {module}') + __import__(module) + + viztracer_path = args.output_viztracer + ctx = contextlib.redirect_stdout(None) if verbose == 0 else empty_context() + perf_args = [] + if args.freq: + perf_args.append(f' --freq={args.freq}') + for event in args.events: + perf_args.append(f' -e {event}') + with ctx: + with PerfRecord(verbose=verbose, args=perf_args) as perf: + with viztracer.VizTracer(output_file=viztracer_path, verbose=verbose, tracer_entries=args.tracer_entries): + if args.module: + runpy.run_module(args.module) + else: + sys.argv = args.args + runpy.run_path(sys.argv[0]) + # forward = "" + # if args.keep_cpython_evals: + # forward += ' --keep-cpython-evals' + # else: + # forward += ' --no-keep-cpython-evals' + # if args.allow_mismatch: + # forward += ' --allow-mismatch' + # else: + # forward += ' --no-allow-mismatch' + # if args.pedantic: + # forward += ' --pedantic' + # else: + # forward += ' --no-pedantic' + # if args.output: + # forward += ' --output=args.output' + + # cmd = f"perf script -i {perf.perf_output} --no-inline | per4m stacktraceinject {forward} --input {viztracer_path}" + # if os.system(cmd) != 0: + # raise OSError(f'Failed to run perf or per4m perf2trace, command:\n$ {cmd}') + + +if __name__ == '__main__': + main() diff --git a/per4m/script.py b/per4m/script.py new file mode 100644 index 0000000..d723b76 --- /dev/null +++ b/per4m/script.py @@ -0,0 +1,201 @@ +import argparse +import json +import shlex +import subprocess +import sys + +from viztracer.prog_snapshot import ProgSnapshot + + +from .perfutils import read_events, parse_header + +usage = """ + +Take stacktraces from VizTracer, and inject them in perf script output + +Usage: + +$ perf-pyscript | per4m stacktraceinject --input viztracer.json +""" + +def print_stderr(*args): + print(*args, file=sys.stderr) + +def stacktrace_inject(stacktrace, snapshot, pid, time, keep_cpython_evals=False, perror=print_stderr, allow_mismatch=False, pedantic=False): + # we'll do in place modification + stacktrace = stacktrace.copy() + # First, we build op parts of the callstack from VizTracers + # We can go into and out of Python/C several times + pystacktraces = [[]] + pystacktrace_part = pystacktraces[-1] + + snapshot.goto_tid(pid) + snapshot.goto_timestamp(time) + bottom_frame = frame = snapshot.curr_frame + was_in_python = frame.node.is_python + while frame: + node = frame.node + # output should become sth like: ffffffff9700008c entry_SYSCALL_64_after_hwframe+0x44 ([kernel.kallsyms]) + filename = node.filename + if filename is None: + location = '' + else: + if 'frozen' in filename: + filename = '' + location = f"::{filename}:{node.lineno}" + funcname = node.funcname or node.fullname + if node.is_python: + type = 'py' + else: + type = 'cext' + # lineno = f':{node.lineno}' if node.lineno is not None else '' + if node.is_python: + if not was_in_python: + # this is a jump back to into c, which we allow + # pystacktraces.append([]) + # pystacktrace_part = pystacktraces[-1] + pass + else: + if was_in_python: + # this is a jump into Python + pystacktraces.append([]) + pystacktrace_part = pystacktraces[-1] + pystacktrace_part.append(f'000000000000000000000000 {type}::{funcname}{location} ([{filename}])') + was_in_python = node.is_python + frame = frame.parent + + # Next, we find where in the perf output, we were in the Python evaluate loop (multiple places possible) + # A list of (index_min, index_max) where the Python stacktraces should be injected/replaced + pyslices = [] + + index_min = None + index_max = None + is_py = [False] * len(stacktrace) + # The C Part of Python callstack should begin with a function with this name in it + begin_functions = ['PyEval_Eval'] + # and all function call after that may be one that contains these names + continue_functions = ['PyEval_Eval', 'PyFunction_FastCall', 'function_code_fastcall', 'method_call', 'call_function', 'PyCFunction_FastCall', 'PyObject_FastCall'] + # we could make the code simpler by flipping the stacktrace, so the bottom of the stack starts at index 0 + # (instead of reversed) + for i, call in enumerate(stacktrace[::-1]): + i = len(stacktrace) - i - 1 + # Or we found the beginning, or a continuation of the evaluation loop + if (index_max is None and any(sig in call for sig in begin_functions)) or\ + (index_max is not None and any(sig in call for sig in continue_functions)): + if index_max is None: + index_max = i + index_min = i + else: + if index_max is not None: + pyslices.append((index_min, index_max)) + is_py[index_min:index_max] = [True] * (index_max - index_min) + index_min = index_max = None + if index_max is not None: + pyslices.append((index_min, index_max)) + + # Lets give some nice output in case it does not match up + if len(pystacktraces) != len(pyslices): + if not pedantic: + # for these we know that we often get a mismatch, and they seem reasonable to ignore + known_failures = ['pythread_wrapper', 'switch_fpu_return', 'do_fork', 'ret_from_fork', 'start_thread'] + for known_failure in known_failures: + if any(known_failure in call for call in stacktrace) and not any('_Py' in call for call in stacktrace): + perror(f'Seen {known_failure} in stacktrace during, silently ignoring the stack mismatch, use --no-pedantic to not ignore this') + return stacktrace + perror(f"OOOOPS, could not match stacktraces at time {time} for pid {pid}!") + perror("Where VizTracer thinks we are:") + perror(bottom_frame.node.fullname) + try: + bottom_frame.show(perror) + except: + perror('Oops, viztracer fails showing') + # snap.where() + perror("Callstack slices where we think we should inject the Python callstack:") + for pyslice in pyslices: + perror(pyslice) + perror("Callstack from perf, where * indicates where we think the Python eval loop is:") + for i, call in enumerate(stacktrace): + perror(i, '*' if is_py[i] else ' ', call) + + perror("Python call stacks, as found from VizTracer:") + for pystacktraces_part in pystacktraces: + perror('-----') + for i, call in enumerate(pystacktraces_part): + perror(call) + perror('-----') + if not allow_mismatch: + raise ValueError('Stack traces from VizTracer and perf could not be matched, run with --allow-mismatch and inspect output carefully') + else: + for pyslice, pystacktraces_part in zip(pyslices, reversed(pystacktraces)): + index_min, index_max = pyslice + if keep_cpython_evals: + for i in range(index_min, index_max+1): + ptr, call = stacktrace[i].split(' ', 1) + stacktrace[i] = f'{ptr} cpyeval::{call}' + else: + # mark for delection, to avoid indices getting messed up + stacktrace[index_min:index_max+1] = [None] * (index_max - index_min + 1) + for pystack in reversed(pystacktraces_part): + stacktrace.insert(index_min, pystack) + stacktrace = [k for k in stacktrace if k is not None] + return stacktrace + + +def main(argv=sys.argv): + parser = argparse.ArgumentParser(argv[0], + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=usage) + parser.add_argument('--verbose', '-v', action='count', default=0) # by default quiet, since we write to stdout + parser.add_argument('--quiet', '-q', action='count', default=0) + parser.add_argument('--keep-cpython-evals', help="keep CPython evaluation stacktraces (instead of replacing) (default: %(default)s)", default=False, action='store_true') + parser.add_argument('--no-keep-cpython-evals', dest="keep_cpython_evals", action='store_false') + parser.add_argument('--allow-mismatch', help="Keep going even when we cannot match the C and Python stacktrace (default: %(default)s)", default=False, action='store_true') + parser.add_argument('--no-allow-mismatch', dest="allow_mismatch", action='store_false') + parser.add_argument('--pedantic', help="If false, accept known stack mismatch issues (default: %(default)s)", default=False, action='store_true') + parser.add_argument('--no-pedantic', dest="pedantic", action='store_false') + parser.add_argument('--input', '-i', help="VizTracer input (default %(default)s)", default="viztracer.json") + parser.add_argument('--output', '-o', dest="output", default=None, help="Output filename (default %(default)s)") + + + args = parser.parse_args(argv[1:]) + verbose = args.verbose - args.quiet + + perf_script_args = ['--no-inline'] + perf_script_args = ' '.join(perf_script_args) + cmd = f"perf script {perf_script_args}" + if verbose >= 2: + print(f"Running: {cmd}") + perf_args = shlex.split(cmd) + perf = subprocess.Popen(perf_args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) #, stdin=subprocess.PIPE) + + if args.output is None: + output = sys.stdout + else: + output = open(args.output, "w") + + if verbose >= 1: + print_stderr("Loading snapshot") + with open(args.input, "r") as f: + json_data = f.read() + snap = ProgSnapshot(json_data) + # find all pids (or tids) + pids = list(snap.func_trees) + for pid in pids.copy(): + pids.extend(list(snap.func_trees[pid])) + t0 = min(event['ts'] for event in json.loads(json_data)['traceEvents']) + + for header, stacktrace in read_events(perf.stdout): + print(header, file=output) + values, _, _ = parse_header(header) + time = values['time'] - t0 + triggerpid = values['triggerpid'] + if triggerpid in pids: + try: + stacktrace = stacktrace_inject(stacktrace, snap, triggerpid, time, keep_cpython_evals=args.keep_cpython_evals, allow_mismatch=args.allow_mismatch, pedantic=args.pedantic) + for call in stacktrace: + print(call, file=output) + except: + print(f"Error for event: {header}") + raise + print(file=output) + diff --git a/setup.py b/setup.py index 9636fdf..9264ad6 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,11 @@ setup( use_scm_version=True, setup_requires=['setuptools_scm'], entry_points={ - 'console_scripts': ['per4m = per4m.__main__:main'] + 'console_scripts': [ + 'per4m = per4m.__main__:main', + 'giltracer = per4m.giltracer:main', + 'perf-pyrecord = per4m.record:main', + 'perf-pyscript = per4m.script:main', + ] } -) \ No newline at end of file +)