diff --git a/README.md b/README.md index 0a17256..08be880 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,18 @@ From PyPy $ pip install per4m -# Demo +# Usage + + $ per4m giltracer -m per4m.example2 + +Open the result.html, and identify the problem (GIL visible, possible low instruction counts/cycle): + + +![image](https://user-images.githubusercontent.com/1765949/102187104-db0c0c00-3eb3-11eb-93ef-e6d938d9e349.png) + + +The dark red `S(GIL)` blocks indicate the threads/processes are in a waiting state due to the GIL, dark orange `S` is a due to other reasons (like `time.sleep(...)`). The regular pattern is due to Python switching threads after [`sys.getswitchinterval`](https://docs.python.org/3/library/sys.html#sys.getswitchinterval) (0.005 seconds) +# Usage - manual ## Step 1 Create a script that uses viztracer to store trace information: @@ -63,13 +74,4 @@ Merge the viztracer and perf/per4m results into a single html file. $ viztracer --combine example1.json example1perf.json -o example1.html -## Step 5 - -Identify the problem (GIL visible, possible low instruction counts/cycle): - - -![image](https://user-images.githubusercontent.com/1765949/102187104-db0c0c00-3eb3-11eb-93ef-e6d938d9e349.png) - - -The dark red `S(GIL)` blocks indicate the threads/processes are in a waiting state due to the GIL, dark orange `S` is a due to other reasons (like `time.sleep(...)`). The regular pattern is due to Python switching threads after [`sys.getswitchinterval`](https://docs.python.org/3/library/sys.html#sys.getswitchinterval) (0.005 seconds) diff --git a/per4m/__main__.py b/per4m/__main__.py index 036a12c..25f179e 100644 --- a/per4m/__main__.py +++ b/per4m/__main__.py @@ -8,7 +8,7 @@ optional arguments: positional arguments: perf2trace convert perf.data to TraceEvent json data - ... anything else will start up the gui, see usage below + giltracer Run VizTracer and perf, and merge the result to see where the GIL is active. Examples: $ perf script --no-inline | per4m -v @@ -23,6 +23,9 @@ def main(args=sys.argv): elif len(args) > 1 and args[1] == "perf2trace": from .perf2trace import main main([os.path.basename(args[0]) + " " + args[1]] + args[2:]) + elif len(args) > 1 and args[1] == "giltracer": + from .giltracer import main + main([os.path.basename(args[0]) + " " + args[1]] + args[2:]) else: print(usage) sys.exit(0) diff --git a/per4m/giltracer.py b/per4m/giltracer.py new file mode 100644 index 0000000..3ff6d9e --- /dev/null +++ b/per4m/giltracer.py @@ -0,0 +1,109 @@ +import argparse +import os +import shlex +import subprocess +import sys +import time +import signal + + +import viztracer +from viztracer.report_builder import ReportBuilder + +import runpy + + +RETRIES = 10 + + +usage = """ + +Convert perf.data to Trace Event Format. + +Usage: + +$ per4m giltracer -m per4m.example1 +""" + +class GilTracer: + def __init__(self, perf_output='perf.data', trace_output='giltracer.json', verbose=1): + self.perf_output = perf_output + self.trace_output = trace_output + self.verbose = verbose + + def __enter__(self): + pid = os.getpid() + cmd = f"perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC --pid {pid} -o {self.perf_output}" + args = shlex.split(cmd) + self.perf = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + start_time = time.time() + for _ in range(RETRIES): + if os.path.exists(self.perf_output): + mtime = os.path.getmtime(self.perf_output) + if mtime > start_time: + break + # we need to wait till perf creates the file + time.sleep(0.1) + else: + raise OSError(f'perf did not create {self.perf_output}') + start_size = os.path.getsize(self.perf_output) + for _ in range(RETRIES): + size = os.path.getsize(self.perf_output) + if size > start_size: + break + # we need to wait till perf writes + time.sleep(0.1) + else: + raise OSError(f'perf did not write to {self.perf_output}') + # and give perf a bit more time + time.sleep(0.05) + return self + + def __exit__(self, *args): + self.perf.terminate() + outs, errs = self.perf.communicate(timeout=5) + if self.verbose >= 1: + print(outs.decode('utf8')) + if errs: + if self.verbose >= 1: + print(errs.decode('utf8')) + if self.perf.returncode not in [0, -signal.SIGTERM.value]: + print(signal.SIGTERM) + raise OSError(f'perf record fails, got exit code {self.perf.returncode}') + # import pdb; pdb.set_trace() + if self.verbose >= 1: + print('Wait for perf to finish...') + self.perf.wait() + + pid = os.getpid() + verbose = '-q ' + '-v ' * self.verbose + cmd = f"perf script -i {self.perf_output} --no-inline | per4m perf2trace -o {self.trace_output} {verbose}" + if os.system(cmd) != 0: + raise OSError('Failed to run perf or per4m perf2trace') + + +def main(argv=sys.argv): + parser = argparse.ArgumentParser(argv[0], + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=usage) + parser.add_argument('--module', '-m') + parser.add_argument('--verbose', '-v', action='count', default=1) + parser.add_argument('--quiet', '-q', action='count', default=0) + parser.add_argument('--output', '-o', dest="output", default='result.html', help="Output filename (default %(default)s)") + parser.add_argument('args', nargs=argparse.REMAINDER) + args = parser.parse_args(argv[1:]) + verbose = args.verbose - args.quiet + + with GilTracer(verbose=verbose) as gt: + with viztracer.VizTracer(output_file="viztracer.json", verbose=verbose): + if args.module: + runpy.run_module(args.module) + else: + sys.argv = args.args + runpy.run_path(sys.argv[0]) + builder = ReportBuilder(['./viztracer.json', './giltracer.json'], verbose=verbose) + builder.save(output_file=args.output) + + +if __name__ == '__main__': + main() diff --git a/per4m/perf2trace.py b/per4m/perf2trace.py index ccde571..cc0b727 100644 --- a/per4m/perf2trace.py +++ b/per4m/perf2trace.py @@ -63,7 +63,8 @@ def main(argv=sys.argv): parser = argparse.ArgumentParser(argv[0], formatter_class=argparse.RawDescriptionHelpFormatter, usage=usage) - parser.add_argument('--verbose', '-v', action='count', default=0) + parser.add_argument('--verbose', '-v', action='count', default=1) + parser.add_argument('--quiet', '-q', action='count', default=0) parser.add_argument('--sleeping', help="store sleeping phase (default: %(default)s)", default=True, action='store_true') parser.add_argument('--no-sleeping', dest="sleeping", action='store_false') parser.add_argument('--running', help="show running phase (default: %(default)s)", default=False, action='store_true') @@ -84,7 +85,7 @@ def main(argv=sys.argv): last_run_time = {} last_sleep_time = {} last_sleep_stacktrace = {} - verbose = args.verbose + verbose = args.verbose - args.quiet time_first = None store_runing = args.running store_sleeping = args.sleeping @@ -132,7 +133,7 @@ def main(argv=sys.argv): # raise ValueError(f'pid {pid} not seen running before, only {last_run_time}') continue dur = time - last_run_time[pid] - if verbose: + if verbose >= 2: log(f'{name} will switch to state={values["prev_state"]}, ran for {dur}') if store_runing: event = {"pid": parent_pid.get(pid, pid), "tid": pid, "ts": last_run_time[pid], "dur": dur, "name": 'R', "ph": "X", "cat": "process state"} @@ -157,7 +158,7 @@ def main(argv=sys.argv): # q last_run_time[pid] = time continue - if verbose: + if verbose >= 2: name = pid_names.get(pid, pid) log(f'Waking up {name}') if store_sleeping: @@ -173,14 +174,14 @@ def main(argv=sys.argv): last_run_time[pid] = time del last_sleep_time[pid] elif event == "sched:sched_process_exec": - if verbose: + if verbose >= 2: name = pid_names.get(triggerpid, triggerpid) log(f'Starting (exec) {name}') elif event == "sched:sched_wakeup_new": # e.g: swapper 0 [040] 3498299.642199: sched:sched_waking: comm=python pid=393320 prio=120 target_cpu=040 values = parse_values(parts, pid=int) pid = values['pid'] - if verbose: + if verbose >= 2: name = pid_names.get(pid, pid) log(f'Starting (new) {name}') last_run_time[pid] = time @@ -188,7 +189,7 @@ def main(argv=sys.argv): values = parse_values(parts, pid=int, child_pid=int) # set up a child parent relationship for better visualization pid, child_pid = values['pid'], values['child_pid'] - if verbose: + if verbose >= 2: log(f'Process {pid} forked {child_pid}') parent_pid[child_pid] = pid elif not tracepoint: @@ -206,7 +207,8 @@ def main(argv=sys.argv): with open(args.output, 'w') as f: json.dump({'traceEvents': trace_events}, f) - print(f"Wrote to {args.output}") + if verbose >= 1: + print(f"Wrote to {args.output}") if __name__ == '__main__':