initial commit

This commit is contained in:
Maarten A. Breddels 2020-12-14 20:32:17 +01:00
commit 886b930f0e
6 changed files with 446 additions and 0 deletions

134
.gitignore vendored Normal file
View File

@ -0,0 +1,134 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
*.json
*.html
*.data
*.data.old

70
README.md Normal file
View File

@ -0,0 +1,70 @@
# Profiling and tracing information for Python using viztracer and perf, the GIL exposed.
This project aims to (at least demonstrate) how to combine the Linux perf tool (aka perf_events) with [viztracer](https://github.com/gaogaotiantian/viztracer) to visualize the GIL (rather process states) and various profiling statistics, or hardware performance counters.
# Installation
From PyPy
$ pip install per4m
# Demo
## Step 1
Create a script that uses viztracer to store trace information:
```python[example1.py]
import threading
import time
import viztracer
import time
def run():
total = 0
for i in range(1_000_000):
total += i
return total
with viztracer.VizTracer(output_file="example1.json"):
thread1 = threading.Thread(target=run)
thread2 = threading.Thread(target=run)
thread1.start()
thread2.start()
time.sleep(0.2)
for thread in [thread1, thread2]:
thread.join()
```
## Step 2
Run and trace scheduler events from the kernel (to capture GIL information) and measure hardware performance counters
```
$ perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC -e L1-dcache-load-misses -e instructions -e cycles -e page-faults -- python -m per4m.example1
Loading finish
Saving report to /home/maartenbreddels/github/maartenbreddels/per4m/example1.json ...
Dumping trace data to json, total entries: 76, estimated json file size: 8.9KiB
Report saved.
[ perf record: Woken up 139 times to write data ]
[ perf record: Captured and wrote 26,139 MB perf.data (3302 samples) ]
```
## Step 3
Convert `perf.data` to [Trace Event Format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/)
$ perf script --no-inline | per4m --no-running -o example1perf.json
## Step 4
Merge the viztracer and perf/per4m results into a single html file.
$ viztracer --combine example1.json example1perf.json -o example1.html
## Step 5
Identify the problem (GIL visible, possible low instruction counts/cycle):

7
per4m/__init__.py Normal file
View File

@ -0,0 +1,7 @@
from pkg_resources import get_distribution, DistributionNotFound
try:
__version__ = get_distribution("per4m").version
except DistributionNotFound:
# package is not installed
pass

193
per4m/__main__.py Normal file
View File

@ -0,0 +1,193 @@
import sys
import json
import argparse
def read_events(input):
first_line = True
stacktrace = []
header = None
for line in sys.stdin:
line = line.strip()
if first_line:
header = line
stacktrace = []
first_line = False
else:
if not line: # done
yield header, stacktrace
first_line = True
continue
stacktrace.append(line)
def parse_values(parts, **types):
values = {}
for part in parts:
if "=" in part:
key, value = part.split('=', 1)
value = types.get(key, str)(value)
values[key] = value
for key in types:
if key not in values:
raise ValueError(f'Expected to find key {key} in {parts}')
return values
usage = """
Convert perf results to Trace Event Format.
Usage:
Always run perf with -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC, the rest of the events are extra
$ perf record -e 'sched:*' --call-graph dwarf -k CLOCK_MONOTONIC -e L1-dcache-load-misses -e instructions -e cycles -e page-faults -- python -m per4m.example1
Run with --no-online, otherwise it may run slow
$ perf script --no-inline | per4m --no-running -o example1perf.json
$ viztracer --combine example1.json example1perf.json -o example1.html
"""
def main(argv=sys.argv):
argv = sys.argv
parser = argparse.ArgumentParser(argv[0],
formatter_class=argparse.RawDescriptionHelpFormatter,
usage=usage)
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--sleeping', help="store sleeping phase (default: %(default)s)", default=True, action='store_true')
parser.add_argument('--no-sleeping', dest="sleeping", action='store_false')
parser.add_argument('--running', help="show running phase (default: %(default)s)", default=True, action='store_true')
parser.add_argument('--no-running', dest="running", action='store_false')
parser.add_argument('--all-tracepoints', help="store all tracepoints phase (default: %(default)s)", default=False, action='store_true')
parser.add_argument('--output', '-o', dest="output", default='perf.json', help="Output filename (default %(default)s)")
args = parser.parse_args(argv[1:])
trace_events = []
# useful for debugging, to have the pids a name
pid_names = {}
# pid_names = {872068: "main", 872070: "t1", 872071: "t2"}
# a bit pendantic to keep these separated
last_run_time = {}
last_sleep_time = {}
verbose = args.verbose
time_first = None
store_runing = args.running
store_sleeping = args.sleeping
parent_pid = {} # maps pid/tid to the parent
count = None
for header, stacktrace in read_events(sys.stdin):
try:
if args.verbose >= 3:
print(header)
pid = None
parts = header.split()
# python 302629 [011] 3485124.180312: sched:sched_switch: prev_comm=python prev_pid=302629 prev_prio=120 prev_state=S ==> next_comm=swapper/11 next_pid=0 next_prio=120
event = parts[4][:-1] # strip off ':'
if ":" in event: # tracepoint
dso, triggerpid, cpu, time, _, *other = parts
tracepoint = True
else: # counter etc
dso, triggerpid, time, count, _, *other = parts
tracepoint = False
triggerpid = int(triggerpid)
time = float(time[:-1]) * 1e6
if time_first is None:
time_first = time
def log(*args, time=time/1e6):
offset = time - time_first/1e6
print(f"{time:13.6f}[+{offset:5.4f}]", *args)
if args.all_tracepoints and tracepoint:
trace_events.append({'name': event, 'pid': parent_pid.get(pid, pid), 'tid': triggerpid, 'ts': time, 'ph': 'i', 's': 'g'})
first_line = False
gil_event = None
if event == "sched:sched_switch":
# e.g. python 393320 [040] 3498299.441431: sched:sched_switch: prev_comm=python prev_pid=393320 prev_prio=120 prev_state=S ==> next_comm=swapper/40 next_pid=0 next_prio=120
values = parse_values(parts, prev_pid=int)
# we are going to sleep?
pid = values['prev_pid']
if values['prev_state'] == 'R':
# this happens when a process just started, so we just set the start time
last_sleep_time[pid] = time
continue
# if values['prev_comm'] != "python":
# if verbose >= 2:
# log(f'Skipping switch from {values["prev_comm"]}')
name = pid_names.get(pid, pid)
if pid not in last_run_time:
# raise ValueError(f'pid {pid} not seen running before, only {last_run_time}')
continue
dur = time - last_run_time[pid]
if verbose:
log(f'{name} will switch to state={values["prev_state"]}, ran for {dur}')
if store_runing:
event = {"pid": parent_pid.get(pid, pid), "tid": pid, "ts": last_run_time[pid], "dur": dur, "name": 'R', "ph": "X", "cat": "process state"}
trace_events.append(event)
last_sleep_time[pid] = time
del last_run_time[pid]
elif event == "sched:sched_wakeup":
# e.g: swapper 0 [040] 3498299.642199: sched:sched_waking: comm=python pid=393320 prio=120 target_cpu=040
prev_state = None
values = parse_values(parts, pid=int)
pid = values['pid']
comm = values['comm']
# if comm != "python":
# if verbose >= 2:
# log(f'Skip waking event for {comm}')
# continue
if pid not in last_sleep_time:
# raise ValueError(f'pid {pid} not seen sleeping before, only {last_sleep_time}')
# this can happen when we did not see the creation
# q
last_run_time[pid] = time
continue
if verbose:
name = pid_names.get(pid, pid)
log(f'Waking up {name}')
if store_sleeping:
dur = time - last_sleep_time[pid]
event = {"pid": parent_pid.get(pid, pid), "tid": pid, "ts": last_sleep_time[pid], "dur": dur, "name": f"S", "ph": "X", "cat": "process state"} #3579742.654826
trace_events.append(event)
last_run_time[pid] = time
del last_sleep_time[pid]
elif event == "sched:sched_process_exec":
if verbose:
name = pid_names.get(triggerpid, triggerpid)
log(f'Starting (exec) {name}')
elif event == "sched:sched_wakeup_new":
# e.g: swapper 0 [040] 3498299.642199: sched:sched_waking: comm=python pid=393320 prio=120 target_cpu=040
values = parse_values(parts, pid=int)
pid = values['pid']
if verbose:
name = pid_names.get(pid, pid)
log(f'Starting (new) {name}')
last_run_time[pid] = time
elif event == "sched:sched_process_fork":
values = parse_values(parts, pid=int, child_pid=int)
# set up a child parent relationship for better visualization
pid, child_pid = values['pid'], values['child_pid']
if verbose:
log(f'Process {pid} forked {child_pid}')
parent_pid[child_pid] = pid
elif not tracepoint:
event = {"pid": 'counters', "ts": time, "name": event, "ph": "C", "args": {event: count}}
trace_events.append(event)
else:
if verbose >= 2:
print("SKIP", header)
pass
except BrokenPipeError:
break
except:
print("error on line", header)
raise
with open(args.output, 'w') as f:
json.dump({'traceEvents': trace_events}, f)
print(f"Wrote to {args.output}")
if __name__ == '__main__':
main()

21
per4m/example1.py Normal file
View File

@ -0,0 +1,21 @@
import threading
import time
import viztracer
import time
def run():
total = 0
for i in range(1_000_000):
total += i
return total
with viztracer.VizTracer(output_file="example1.json"):
thread1 = threading.Thread(target=run)
thread2 = threading.Thread(target=run)
thread1.start()
thread2.start()
time.sleep(0.2)
for thread in [thread1, thread2]:
thread.join()

21
setup.py Normal file
View File

@ -0,0 +1,21 @@
from setuptools import setup, find_packages
author = "Maarten A. Breddels"
author_email = "maartenbreddels@gmail.com"
license = 'MIT'
url = 'https://www.github.com/maartenbreddels/per4m'
setup(
author=author,
author_email=author_email,
name='per4m',
description = "Profiling and tracing information for Python using viztracer and perf, the GIL exposed.",
# version='0.1',
url=url,
packages=['per4m'],
license=license,
use_scm_version=True,
setup_requires=['setuptools_scm'],
entry_points={
'console_scripts': ['per4m = per4m.__main__:main']
}
)