feat: perf-pyrecord and perf-pyscript to inject python stacktraces

This commit is contained in:
Maarten A. Breddels 2020-12-16 13:14:04 +01:00
parent ede531f7d7
commit 93d9e8251f
8 changed files with 438 additions and 88 deletions

View File

@ -7,8 +7,10 @@ optional arguments:
-h, --help show this help message and exit
positional arguments:
perf2trace convert perf.data to TraceEvent json data
giltracer Run VizTracer and perf, and merge the result to see where the GIL is active.
record Run VizTracer and perf simultaneously. See also man perf record.
script Take stacktraces from VizTracer, and inject them in perf script output.
perf2trace Convert perf.data to TraceEvent JSON data.
Examples:
$ perf script --no-inline | per4m -v
@ -26,6 +28,12 @@ def main(args=sys.argv):
elif len(args) > 1 and args[1] == "giltracer":
from .giltracer import main
main([os.path.basename(args[0]) + " " + args[1]] + args[2:])
elif len(args) > 1 and args[1] == "record":
from .record import main
main([os.path.basename(args[0]) + " " + args[1]] + args[2:])
elif len(args) > 1 and args[1] == "script":
from .script import main
main([os.path.basename(args[0]) + " " + args[1]] + args[2:])
else:
print(usage)
sys.exit(0)

15
per4m/example4.py Normal file
View File

@ -0,0 +1,15 @@
import time
values = [('foo', 3), ('bar', 42), ('bazz', 2)]
def key_function(item):
time.sleep(0.01)
name, index = item
return index
def do_sort():
values.sort(key=key_function)
do_sort()
def do_print():
print(values)
do_print()

View File

@ -1,89 +1,32 @@
import argparse
import os
import shlex
import subprocess
import sys
import time
import signal
import viztracer
from viztracer.report_builder import ReportBuilder
import runpy
RETRIES = 10
from .record import PerfRecord
usage = """
Convert perf.data to Trace Event Format.
Run VizTracer and perf simultaneously, then inject the GIL information in the viztracer output
Usage:
$ per4m giltracer -m per4m.example1
$ giltracer -m per4m.example1
"""
class GilTracer:
def __init__(self, perf_output='perf.data', trace_output='giltracer.json', verbose=1):
self.perf_output = perf_output
class GilTracer(PerfRecord):
def __init__(self, output='perf.data', trace_output='giltracer.json', verbose=1):
super().__init__(output=output, verbose=verbose, args=["-e 'sched:*'"])
self.trace_output = trace_output
self.verbose = verbose
def __enter__(self):
pid = os.getpid()
cmd = f"perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.perf_output}"
if self.verbose >= 2:
print(f"Running: {cmd}")
args = shlex.split(cmd)
self.perf = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
start_time = time.time()
for _ in range(RETRIES):
if os.path.exists(self.perf_output):
mtime = os.path.getmtime(self.perf_output)
if mtime > start_time:
break
# we need to wait till perf creates the file
time.sleep(0.1)
else:
self.finish()
raise OSError(f'perf did not create {self.perf_output}')
start_size = os.path.getsize(self.perf_output)
for _ in range(RETRIES):
size = os.path.getsize(self.perf_output)
if size > start_size:
break
# we need to wait till perf writes
time.sleep(0.1)
else:
self.finish()
raise OSError(f'perf did not write to {self.perf_output}')
# and give perf a bit more time
time.sleep(0.05)
return self
def finish(self):
self.perf.terminate()
outs, errs = self.perf.communicate(timeout=5)
if self.verbose >= 1:
print(outs.decode('utf8'))
if errs:
if self.verbose >= 1:
print(errs.decode('utf8'))
if self.perf.returncode not in [0, -signal.SIGTERM.value]:
print(signal.SIGTERM)
raise OSError(f'perf record fails, got exit code {self.perf.returncode}')
def __exit__(self, *args):
self.finish()
if self.verbose >= 1:
print('Wait for perf to finish...')
self.perf.wait()
pid = os.getpid()
super().__exit__(self, *args)
verbose = '-q ' + '-v ' * self.verbose
cmd = f"perf script -i {self.perf_output} --no-inline | per4m perf2trace -o {self.trace_output} {verbose}"
cmd = f"perf script -i {self.output} --no-inline | per4m perf2trace -o {self.trace_output} {verbose}"
if os.system(cmd) != 0:
raise OSError(f'Failed to run perf or per4m perf2trace, command:\n$ {cmd}')

View File

@ -1,25 +1,7 @@
import sys
import json
import argparse
def read_events(input):
first_line = True
stacktrace = []
header = None
for line in sys.stdin:
line = line.strip()
if first_line:
header = line
stacktrace = []
first_line = False
else:
if not line: # done
yield header, stacktrace
first_line = True
continue
stacktrace.append(line)
from .perfutils import read_events
def parse_values(parts, **types):
values = {}
@ -48,7 +30,7 @@ def drops_gil(stacktrace):
usage = """
Convert perf.data to Trace Event Format.
Convert perf.data to TraceEvent JSON data.
Usage:

34
per4m/perfutils.py Normal file
View File

@ -0,0 +1,34 @@
def read_events(input):
first_line = True
stacktrace = []
header = None
for line in input:
line = line.strip()
if first_line:
header = line
stacktrace = []
first_line = False
else:
if not line: # done
yield header, stacktrace
first_line = True
continue
stacktrace.append(line)
if stacktrace:
yield header, stacktrace
def parse_header(header):
parts = header.split()
event = parts[4][:-1] # strip off ':'
if ":" in event: # tracepoint
dso, triggerpid, cpu, time, _, *other = parts
time = float(time[:-1]) * 1e6
values = dict(dso=dso, triggerpid=int(triggerpid), cpu=cpu, time=time)
tracepoint = True
else: # counter etc
dso, triggerpid, time, count, _, *other = parts
time = float(time[:-1]) * 1e6
values = dict(dso=dso, triggerpid=int(triggerpid), count=count, time=time)
tracepoint = False
return values, other, tracepoint

162
per4m/record.py Normal file
View File

@ -0,0 +1,162 @@
import argparse
import contextlib
import os
import runpy
import shlex
import subprocess
import signal
import sys
import time
import viztracer
import numpy
RETRIES = 10
usage = """
Run VizTracer and perf simultaneously. See also man perf record.
Usage:
$ perf-pyrecord -e cycles -m per4m.example2
"""
class PerfRecord:
def __init__(self, output='perf.data', args=[], verbose=1):
self.output = output
self.args = args
self.verbose = verbose
def __enter__(self):
pid = os.getpid()
# cmd = f"perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.output}"
perf_args = ' '.join(self.args)
cmd = f"perf record {perf_args} --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.output}"
if self.verbose >= 2:
print(f"Running: {cmd}")
args = shlex.split(cmd)
self.perf = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
start_time = time.time()
for _ in range(RETRIES):
if os.path.exists(self.output):
mtime = os.path.getmtime(self.output)
if mtime > start_time:
break
# we need to wait till perf creates the file
time.sleep(0.1)
else:
self.finish()
raise OSError(f'perf did not create {self.output}')
start_size = os.path.getsize(self.output)
for _ in range(RETRIES):
size = os.path.getsize(self.output)
if size > start_size:
break
# we need to wait till perf writes
time.sleep(0.1)
else:
self.finish()
raise OSError(f'perf did not write to {self.output}')
# and give perf a bit more time
time.sleep(0.05)
return self
def finish(self):
self.perf.terminate()
outs, errs = self.perf.communicate(timeout=5)
if self.verbose >= 1:
print(outs.decode('utf8'))
if errs:
if self.verbose >= 1:
print(errs.decode('utf8'))
if self.perf.returncode not in [0, -signal.SIGTERM.value]:
print(signal.SIGTERM)
raise OSError(f'perf record fails, got exit code {self.perf.returncode}')
def __exit__(self, *args):
self.finish()
if self.verbose >= 1:
print('Wait for perf to finish...')
self.perf.wait()
@contextlib.contextmanager
def empty_context():
yield
def main(argv=sys.argv):
parser = argparse.ArgumentParser(argv[0],
formatter_class=argparse.RawDescriptionHelpFormatter,
usage=usage)
parser.add_argument('--module', '-m')
parser.add_argument('--import', dest="import_", help="Comma seperated list of modules to import before tracing (cleans up tracing output)")
parser.add_argument('--verbose', '-v', action='count', default=1)
parser.add_argument('--quiet', '-q', action='count', default=0)
parser.add_argument('--output', '-o', dest="output", help="Output filename (default %(default)s)", default='perf.data')
parser.add_argument('--output-viztracer', help="Output filename for viztracer (default %(default)s)", default='viztracer.json')
parser.add_argument('--freq', '-F', help="Profile frequency, passed down to perf record (see man perf record)")
parser.add_argument('--event', '-e', dest='events', help="Select PMU event, passed down to perf record (see man perf record)", action='append', default=[])
parser.add_argument('--tracer_entries', type=int, default=1000000, help="See viztracer --help")
# # these gets passed to stacktraceinject
# parser.add_argument('--keep-cpython-evals', help="keep CPython evaluation stacktraces (instead of replacing) (default: %(default)s)", default=True, action='store_true')
# parser.add_argument('--no-keep-cpython-evals', dest="keep_cpython_evals", action='store_false')
# parser.add_argument('--allow-mismatch', help="Keep going even when we cannot match the C and Python stacktrace (default: %(default)s)", default=False, action='store_true')
# parser.add_argument('--no-allow-mismatch', dest="allow_mismatch", action='store_false')
# parser.add_argument('--pedantic', help="If false, accept known stack mismatch issues (default: %(default)s)", default=False, action='store_true')
# parser.add_argument('--no-pedantic', dest="pedantic", action='store_false')
parser.add_argument('args', nargs=argparse.REMAINDER)
args = parser.parse_args(argv[1:])
verbose = args.verbose - args.quiet
if args.import_:
for module in args.import_.split(','):
if verbose >= 2:
print(f'importing {module}')
__import__(module)
viztracer_path = args.output_viztracer
ctx = contextlib.redirect_stdout(None) if verbose == 0 else empty_context()
perf_args = []
if args.freq:
perf_args.append(f' --freq={args.freq}')
for event in args.events:
perf_args.append(f' -e {event}')
with ctx:
with PerfRecord(verbose=verbose, args=perf_args) as perf:
with viztracer.VizTracer(output_file=viztracer_path, verbose=verbose, tracer_entries=args.tracer_entries):
if args.module:
runpy.run_module(args.module)
else:
sys.argv = args.args
runpy.run_path(sys.argv[0])
# forward = ""
# if args.keep_cpython_evals:
# forward += ' --keep-cpython-evals'
# else:
# forward += ' --no-keep-cpython-evals'
# if args.allow_mismatch:
# forward += ' --allow-mismatch'
# else:
# forward += ' --no-allow-mismatch'
# if args.pedantic:
# forward += ' --pedantic'
# else:
# forward += ' --no-pedantic'
# if args.output:
# forward += ' --output=args.output'
# cmd = f"perf script -i {perf.perf_output} --no-inline | per4m stacktraceinject {forward} --input {viztracer_path}"
# if os.system(cmd) != 0:
# raise OSError(f'Failed to run perf or per4m perf2trace, command:\n$ {cmd}')
if __name__ == '__main__':
main()

201
per4m/script.py Normal file
View File

@ -0,0 +1,201 @@
import argparse
import json
import shlex
import subprocess
import sys
from viztracer.prog_snapshot import ProgSnapshot
from .perfutils import read_events, parse_header
usage = """
Take stacktraces from VizTracer, and inject them in perf script output
Usage:
$ perf-pyscript | per4m stacktraceinject --input viztracer.json
"""
def print_stderr(*args):
print(*args, file=sys.stderr)
def stacktrace_inject(stacktrace, snapshot, pid, time, keep_cpython_evals=False, perror=print_stderr, allow_mismatch=False, pedantic=False):
# we'll do in place modification
stacktrace = stacktrace.copy()
# First, we build op parts of the callstack from VizTracers
# We can go into and out of Python/C several times
pystacktraces = [[]]
pystacktrace_part = pystacktraces[-1]
snapshot.goto_tid(pid)
snapshot.goto_timestamp(time)
bottom_frame = frame = snapshot.curr_frame
was_in_python = frame.node.is_python
while frame:
node = frame.node
# output should become sth like: ffffffff9700008c entry_SYSCALL_64_after_hwframe+0x44 ([kernel.kallsyms])
filename = node.filename
if filename is None:
location = ''
else:
if 'frozen' in filename:
filename = '<frozen>'
location = f"::{filename}:{node.lineno}"
funcname = node.funcname or node.fullname
if node.is_python:
type = 'py'
else:
type = 'cext'
# lineno = f':{node.lineno}' if node.lineno is not None else ''
if node.is_python:
if not was_in_python:
# this is a jump back to into c, which we allow
# pystacktraces.append([])
# pystacktrace_part = pystacktraces[-1]
pass
else:
if was_in_python:
# this is a jump into Python
pystacktraces.append([])
pystacktrace_part = pystacktraces[-1]
pystacktrace_part.append(f'000000000000000000000000 {type}::{funcname}{location} ([{filename}])')
was_in_python = node.is_python
frame = frame.parent
# Next, we find where in the perf output, we were in the Python evaluate loop (multiple places possible)
# A list of (index_min, index_max) where the Python stacktraces should be injected/replaced
pyslices = []
index_min = None
index_max = None
is_py = [False] * len(stacktrace)
# The C Part of Python callstack should begin with a function with this name in it
begin_functions = ['PyEval_Eval']
# and all function call after that may be one that contains these names
continue_functions = ['PyEval_Eval', 'PyFunction_FastCall', 'function_code_fastcall', 'method_call', 'call_function', 'PyCFunction_FastCall', 'PyObject_FastCall']
# we could make the code simpler by flipping the stacktrace, so the bottom of the stack starts at index 0
# (instead of reversed)
for i, call in enumerate(stacktrace[::-1]):
i = len(stacktrace) - i - 1
# Or we found the beginning, or a continuation of the evaluation loop
if (index_max is None and any(sig in call for sig in begin_functions)) or\
(index_max is not None and any(sig in call for sig in continue_functions)):
if index_max is None:
index_max = i
index_min = i
else:
if index_max is not None:
pyslices.append((index_min, index_max))
is_py[index_min:index_max] = [True] * (index_max - index_min)
index_min = index_max = None
if index_max is not None:
pyslices.append((index_min, index_max))
# Lets give some nice output in case it does not match up
if len(pystacktraces) != len(pyslices):
if not pedantic:
# for these we know that we often get a mismatch, and they seem reasonable to ignore
known_failures = ['pythread_wrapper', 'switch_fpu_return', 'do_fork', 'ret_from_fork', 'start_thread']
for known_failure in known_failures:
if any(known_failure in call for call in stacktrace) and not any('_Py' in call for call in stacktrace):
perror(f'Seen {known_failure} in stacktrace during, silently ignoring the stack mismatch, use --no-pedantic to not ignore this')
return stacktrace
perror(f"OOOOPS, could not match stacktraces at time {time} for pid {pid}!")
perror("Where VizTracer thinks we are:")
perror(bottom_frame.node.fullname)
try:
bottom_frame.show(perror)
except:
perror('Oops, viztracer fails showing')
# snap.where()
perror("Callstack slices where we think we should inject the Python callstack:")
for pyslice in pyslices:
perror(pyslice)
perror("Callstack from perf, where * indicates where we think the Python eval loop is:")
for i, call in enumerate(stacktrace):
perror(i, '*' if is_py[i] else ' ', call)
perror("Python call stacks, as found from VizTracer:")
for pystacktraces_part in pystacktraces:
perror('-----')
for i, call in enumerate(pystacktraces_part):
perror(call)
perror('-----')
if not allow_mismatch:
raise ValueError('Stack traces from VizTracer and perf could not be matched, run with --allow-mismatch and inspect output carefully')
else:
for pyslice, pystacktraces_part in zip(pyslices, reversed(pystacktraces)):
index_min, index_max = pyslice
if keep_cpython_evals:
for i in range(index_min, index_max+1):
ptr, call = stacktrace[i].split(' ', 1)
stacktrace[i] = f'{ptr} cpyeval::{call}'
else:
# mark for delection, to avoid indices getting messed up
stacktrace[index_min:index_max+1] = [None] * (index_max - index_min + 1)
for pystack in reversed(pystacktraces_part):
stacktrace.insert(index_min, pystack)
stacktrace = [k for k in stacktrace if k is not None]
return stacktrace
def main(argv=sys.argv):
parser = argparse.ArgumentParser(argv[0],
formatter_class=argparse.RawDescriptionHelpFormatter,
usage=usage)
parser.add_argument('--verbose', '-v', action='count', default=0) # by default quiet, since we write to stdout
parser.add_argument('--quiet', '-q', action='count', default=0)
parser.add_argument('--keep-cpython-evals', help="keep CPython evaluation stacktraces (instead of replacing) (default: %(default)s)", default=False, action='store_true')
parser.add_argument('--no-keep-cpython-evals', dest="keep_cpython_evals", action='store_false')
parser.add_argument('--allow-mismatch', help="Keep going even when we cannot match the C and Python stacktrace (default: %(default)s)", default=False, action='store_true')
parser.add_argument('--no-allow-mismatch', dest="allow_mismatch", action='store_false')
parser.add_argument('--pedantic', help="If false, accept known stack mismatch issues (default: %(default)s)", default=False, action='store_true')
parser.add_argument('--no-pedantic', dest="pedantic", action='store_false')
parser.add_argument('--input', '-i', help="VizTracer input (default %(default)s)", default="viztracer.json")
parser.add_argument('--output', '-o', dest="output", default=None, help="Output filename (default %(default)s)")
args = parser.parse_args(argv[1:])
verbose = args.verbose - args.quiet
perf_script_args = ['--no-inline']
perf_script_args = ' '.join(perf_script_args)
cmd = f"perf script {perf_script_args}"
if verbose >= 2:
print(f"Running: {cmd}")
perf_args = shlex.split(cmd)
perf = subprocess.Popen(perf_args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) #, stdin=subprocess.PIPE)
if args.output is None:
output = sys.stdout
else:
output = open(args.output, "w")
if verbose >= 1:
print_stderr("Loading snapshot")
with open(args.input, "r") as f:
json_data = f.read()
snap = ProgSnapshot(json_data)
# find all pids (or tids)
pids = list(snap.func_trees)
for pid in pids.copy():
pids.extend(list(snap.func_trees[pid]))
t0 = min(event['ts'] for event in json.loads(json_data)['traceEvents'])
for header, stacktrace in read_events(perf.stdout):
print(header, file=output)
values, _, _ = parse_header(header)
time = values['time'] - t0
triggerpid = values['triggerpid']
if triggerpid in pids:
try:
stacktrace = stacktrace_inject(stacktrace, snap, triggerpid, time, keep_cpython_evals=args.keep_cpython_evals, allow_mismatch=args.allow_mismatch, pedantic=args.pedantic)
for call in stacktrace:
print(call, file=output)
except:
print(f"Error for event: {header}")
raise
print(file=output)

View File

@ -16,6 +16,11 @@ setup(
use_scm_version=True,
setup_requires=['setuptools_scm'],
entry_points={
'console_scripts': ['per4m = per4m.__main__:main']
'console_scripts': [
'per4m = per4m.__main__:main',
'giltracer = per4m.giltracer:main',
'perf-pyrecord = per4m.record:main',
'perf-pyscript = per4m.script:main',
]
}
)