Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,13 @@ skewness (should be 0) -0.0071960817771
max, min 5.83625092886 -3.4749002526
m2, m3, m4 999218.191078 -7187.64448532 2993126.28574
```

Profiler
--------
The :mod:`faststat.profiler` module can attach using the standard
``sys.setprofile`` and ``sys.settrace`` hooks to collect per-function or
per-line timing information. Run a script under the profiler using::

python -m faststat.profiler myscript.py

This prints timing statistics for the executed code.
1 change: 1 addition & 0 deletions faststat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# using relative imports so ``import faststat`` works correctly.
from .faststat import *
from .format import *
from .profiler import Profiler, cli as profiler_cli

try:
from importlib.metadata import version as _version, PackageNotFoundError
Expand Down
128 changes: 128 additions & 0 deletions faststat/profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""Lightweight function and line profiler integrated with faststat.

This module provides a :class:`Profiler` that attaches using the standard
``sys.setprofile`` and ``sys.settrace`` hooks. A :class:`faststat.Duration`
instance is maintained for each function or line that executes, allowing
collection of high level timing statistics.
"""

from __future__ import annotations

import argparse
import runpy
import sys
from collections import defaultdict
from types import FrameType
from typing import Callable, Dict, Tuple

from .faststat import Duration, nanotime


class Profiler:
"""Collect execution timing statistics using faststat."""

def __init__(self, *, line_stats: bool = False, func_stats: bool = True):
self.line_stats = line_stats
self.func_stats = func_stats
self._call_start: Dict[FrameType, int] = {}
self._line_info: Dict[FrameType, Tuple[int, int]] = {}
self.func_data: Dict[Tuple[str, str, int], Duration] = defaultdict(
lambda: Duration(interval=False)
)
self.line_data: Dict[Tuple[str, int], Duration] = defaultdict(
lambda: Duration(interval=False)
)

# tracing ---------------------------------------------------------------
def _trace(self, frame: FrameType, event: str, arg):
now = nanotime()
if event == "call":
if self.func_stats:
self._call_start[frame] = now
if self.line_stats:
self._line_info[frame] = (now, frame.f_lineno)
return self._trace
if event == "return":
if self.func_stats and frame in self._call_start:
start = self._call_start.pop(frame)
key = (
frame.f_code.co_filename,
frame.f_code.co_name,
frame.f_code.co_firstlineno,
)
self.func_data[key].end(start)
if self.line_stats and frame in self._line_info:
start, lineno = self._line_info.pop(frame)
self.line_data[(frame.f_code.co_filename, lineno)].end(start)
return self._trace
if event == "line" and self.line_stats:
if frame in self._line_info:
start, lineno = self._line_info[frame]
self.line_data[(frame.f_code.co_filename, lineno)].end(start)
self._line_info[frame] = (now, frame.f_lineno)
return self._trace
return self._trace

# public API -----------------------------------------------------------
def start(self) -> None:
sys.setprofile(self._trace)
if self.line_stats:
sys.settrace(self._trace)

def stop(self) -> None:
sys.setprofile(None)
if self.line_stats:
sys.settrace(None)
self._call_start.clear()
self._line_info.clear()

def report(self, *, limit: int = 20) -> None:
"""Print collected statistics sorted by mean duration."""

if self.func_stats:
print("Function timings:")
for (file, name, lineno), stat in sorted(
self.func_data.items(), key=lambda kv: kv[1].mean, reverse=True
)[:limit]:
print(
f"{name} {file}:{lineno} n={stat.n} mean={stat.mean} max={stat.max}"
)
if self.line_stats:
print("Line timings:")
for (file, lineno), stat in sorted(
self.line_data.items(), key=lambda kv: kv[1].mean, reverse=True
)[:limit]:
print(
f"{file}:{lineno} n={stat.n} mean={stat.mean} max={stat.max}"
)


# CLI ---------------------------------------------------------------------

def cli(argv: list[str] | None = None) -> None:
parser = argparse.ArgumentParser(description="Profile python code using faststat")
parser.add_argument(
"-m",
"--mode",
choices=["function", "line", "both"],
default="function",
help="What to profile",
)
parser.add_argument("script", help="Python script to run")
parser.add_argument("args", nargs=argparse.REMAINDER, help="Arguments for script")
ns = parser.parse_args(argv)

line_stats = ns.mode in ("line", "both")
func_stats = ns.mode in ("function", "both")
prof = Profiler(line_stats=line_stats, func_stats=func_stats)
sys.argv = [ns.script] + ns.args
prof.start()
try:
runpy.run_path(ns.script, run_name="__main__")
finally:
prof.stop()
prof.report()


if __name__ == "__main__": # pragma: no cover - manual usage
cli()
32 changes: 32 additions & 0 deletions test/test_profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import faststat.profiler as profiler


def busy_wait():
for _ in range(1000):
pass


def test_function_stats():
p = profiler.Profiler(func_stats=True, line_stats=False)
p.start()
busy_wait()
p.stop()
key = (
busy_wait.__code__.co_filename,
busy_wait.__name__,
busy_wait.__code__.co_firstlineno,
)
assert key in p.func_data
stat = p.func_data[key]
assert stat.n >= 1
assert stat.mean > 0


def test_line_stats():
p = profiler.Profiler(func_stats=False, line_stats=True)
p.start()
busy_wait()
p.stop()
file = busy_wait.__code__.co_filename
assert any(k[0] == file for k in p.line_data)

Loading