diff --git a/README.md b/README.md index 412a697..860d685 100644 --- a/README.md +++ b/README.md @@ -42,3 +42,13 @@ skewness (should be 0) -0.0071960817771 max, min 5.83625092886 -3.4749002526 m2, m3, m4 999218.191078 -7187.64448532 2993126.28574 ``` + +Profiler +-------- +The :mod:`faststat.profiler` module can attach using the standard +``sys.setprofile`` and ``sys.settrace`` hooks to collect per-function or +per-line timing information. Run a script under the profiler using:: + + python -m faststat.profiler myscript.py + +This prints timing statistics for the executed code. diff --git a/faststat/__init__.py b/faststat/__init__.py index 1cf4da8..9a73d34 100644 --- a/faststat/__init__.py +++ b/faststat/__init__.py @@ -2,6 +2,7 @@ # using relative imports so ``import faststat`` works correctly. from .faststat import * from .format import * +from .profiler import Profiler, cli as profiler_cli try: from importlib.metadata import version as _version, PackageNotFoundError diff --git a/faststat/profiler.py b/faststat/profiler.py new file mode 100644 index 0000000..bf92274 --- /dev/null +++ b/faststat/profiler.py @@ -0,0 +1,128 @@ +"""Lightweight function and line profiler integrated with faststat. + +This module provides a :class:`Profiler` that attaches using the standard +``sys.setprofile`` and ``sys.settrace`` hooks. A :class:`faststat.Duration` +instance is maintained for each function or line that executes, allowing +collection of high level timing statistics. +""" + +from __future__ import annotations + +import argparse +import runpy +import sys +from collections import defaultdict +from types import FrameType +from typing import Callable, Dict, Tuple + +from .faststat import Duration, nanotime + + +class Profiler: + """Collect execution timing statistics using faststat.""" + + def __init__(self, *, line_stats: bool = False, func_stats: bool = True): + self.line_stats = line_stats + self.func_stats = func_stats + self._call_start: Dict[FrameType, int] = {} + self._line_info: Dict[FrameType, Tuple[int, int]] = {} + self.func_data: Dict[Tuple[str, str, int], Duration] = defaultdict( + lambda: Duration(interval=False) + ) + self.line_data: Dict[Tuple[str, int], Duration] = defaultdict( + lambda: Duration(interval=False) + ) + + # tracing --------------------------------------------------------------- + def _trace(self, frame: FrameType, event: str, arg): + now = nanotime() + if event == "call": + if self.func_stats: + self._call_start[frame] = now + if self.line_stats: + self._line_info[frame] = (now, frame.f_lineno) + return self._trace + if event == "return": + if self.func_stats and frame in self._call_start: + start = self._call_start.pop(frame) + key = ( + frame.f_code.co_filename, + frame.f_code.co_name, + frame.f_code.co_firstlineno, + ) + self.func_data[key].end(start) + if self.line_stats and frame in self._line_info: + start, lineno = self._line_info.pop(frame) + self.line_data[(frame.f_code.co_filename, lineno)].end(start) + return self._trace + if event == "line" and self.line_stats: + if frame in self._line_info: + start, lineno = self._line_info[frame] + self.line_data[(frame.f_code.co_filename, lineno)].end(start) + self._line_info[frame] = (now, frame.f_lineno) + return self._trace + return self._trace + + # public API ----------------------------------------------------------- + def start(self) -> None: + sys.setprofile(self._trace) + if self.line_stats: + sys.settrace(self._trace) + + def stop(self) -> None: + sys.setprofile(None) + if self.line_stats: + sys.settrace(None) + self._call_start.clear() + self._line_info.clear() + + def report(self, *, limit: int = 20) -> None: + """Print collected statistics sorted by mean duration.""" + + if self.func_stats: + print("Function timings:") + for (file, name, lineno), stat in sorted( + self.func_data.items(), key=lambda kv: kv[1].mean, reverse=True + )[:limit]: + print( + f"{name} {file}:{lineno} n={stat.n} mean={stat.mean} max={stat.max}" + ) + if self.line_stats: + print("Line timings:") + for (file, lineno), stat in sorted( + self.line_data.items(), key=lambda kv: kv[1].mean, reverse=True + )[:limit]: + print( + f"{file}:{lineno} n={stat.n} mean={stat.mean} max={stat.max}" + ) + + +# CLI --------------------------------------------------------------------- + +def cli(argv: list[str] | None = None) -> None: + parser = argparse.ArgumentParser(description="Profile python code using faststat") + parser.add_argument( + "-m", + "--mode", + choices=["function", "line", "both"], + default="function", + help="What to profile", + ) + parser.add_argument("script", help="Python script to run") + parser.add_argument("args", nargs=argparse.REMAINDER, help="Arguments for script") + ns = parser.parse_args(argv) + + line_stats = ns.mode in ("line", "both") + func_stats = ns.mode in ("function", "both") + prof = Profiler(line_stats=line_stats, func_stats=func_stats) + sys.argv = [ns.script] + ns.args + prof.start() + try: + runpy.run_path(ns.script, run_name="__main__") + finally: + prof.stop() + prof.report() + + +if __name__ == "__main__": # pragma: no cover - manual usage + cli() diff --git a/test/test_profiler.py b/test/test_profiler.py new file mode 100644 index 0000000..5867c39 --- /dev/null +++ b/test/test_profiler.py @@ -0,0 +1,32 @@ +import faststat.profiler as profiler + + +def busy_wait(): + for _ in range(1000): + pass + + +def test_function_stats(): + p = profiler.Profiler(func_stats=True, line_stats=False) + p.start() + busy_wait() + p.stop() + key = ( + busy_wait.__code__.co_filename, + busy_wait.__name__, + busy_wait.__code__.co_firstlineno, + ) + assert key in p.func_data + stat = p.func_data[key] + assert stat.n >= 1 + assert stat.mean > 0 + + +def test_line_stats(): + p = profiler.Profiler(func_stats=False, line_stats=True) + p.start() + busy_wait() + p.stop() + file = busy_wait.__code__.co_filename + assert any(k[0] == file for k in p.line_data) +