diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 08aef92f..075db2a2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -53,6 +53,7 @@ Changes * ``kernprof`` and ``python -m line_profiler`` CLI options * ``GlobalProfiler`` configurations, and * profiler output (e.g. ``LineProfiler.print_stats()``) formatting +* ENH: Added capability to combine profiling data both programmatically (``LineStats.__add__()``) and via the CLI (``python -m line_profiler``) (#380, originally proposed in #219) 4.2.0 ~~~~~ diff --git a/line_profiler/__init__.py b/line_profiler/__init__.py index 8c82dd2e..d7495ca5 100644 --- a/line_profiler/__init__.py +++ b/line_profiler/__init__.py @@ -251,7 +251,7 @@ def main(): # NOTE: This needs to be in sync with ../kernprof.py and line_profiler.py __version__ = '5.0.1' -from .line_profiler import (LineProfiler, +from .line_profiler import (LineProfiler, LineStats, load_ipython_extension, load_stats, main, show_func, show_text,) @@ -259,6 +259,6 @@ def main(): from .explicit_profiler import profile -__all__ = ['LineProfiler', 'line_profiler', +__all__ = ['LineProfiler', 'LineStats', 'line_profiler', 'load_ipython_extension', 'load_stats', 'main', 'show_func', 'show_text', '__version__', 'profile'] diff --git a/line_profiler/line_profiler.py b/line_profiler/line_profiler.py index a75c8623..4e3a2c01 100755 --- a/line_profiler/line_profiler.py +++ b/line_profiler/line_profiler.py @@ -7,6 +7,7 @@ import functools import inspect import linecache +import operator import os import pickle import sys @@ -17,7 +18,8 @@ from datetime import datetime try: - from ._line_profiler import LineProfiler as CLineProfiler + from ._line_profiler import (LineProfiler as CLineProfiler, + LineStats as CLineStats) except ImportError as ex: raise ImportError( 'The line_profiler._line_profiler c-extension is not importable. ' @@ -186,6 +188,169 @@ def __init__(self, func, profiler_id): self.profiler_id = profiler_id +class LineStats(CLineStats): + def __repr__(self): + return '{}({}, {:.2G})'.format( + type(self).__name__, self.timings, self.unit) + + def __eq__(self, other): + """ + Example: + >>> from copy import deepcopy + >>> + >>> + >>> stats1 = LineStats( + ... {('foo', 1, 'spam.py'): [(2, 10, 300)], + ... ('bar', 10, 'spam.py'): + ... [(11, 2, 1000), (12, 1, 500)]}, + ... 1E-6) + >>> stats2 = deepcopy(stats1) + >>> assert stats1 == stats2 is not stats1 + >>> stats2.timings = 1E-7 + >>> assert stats2 != stats1 + >>> stats3 = deepcopy(stats1) + >>> assert stats1 == stats3 is not stats1 + >>> stats3.timings['foo', 1, 'spam.py'][:] = [(2, 11, 330)] + >>> assert stats3 != stats1 + """ + for attr in 'timings', 'unit': + getter = operator.attrgetter(attr) + try: + if getter(self) != getter(other): + return False + except (AttributeError, TypeError): + return NotImplemented + return True + + def __add__(self, other): + """ + Example: + >>> stats1 = LineStats( + ... {('foo', 1, 'spam.py'): [(2, 10, 300)], + ... ('bar', 10, 'spam.py'): + ... [(11, 2, 1000), (12, 1, 500)]}, + ... 1E-6) + >>> stats2 = LineStats( + ... {('bar', 10, 'spam.py'): + ... [(11, 10, 20000), (12, 5, 1000)], + ... ('baz', 5, 'eggs.py'): [(5, 2, 5000)]}, + ... 1E-7) + >>> stats_sum = LineStats( + ... {('foo', 1, 'spam.py'): [(2, 10, 300)], + ... ('bar', 10, 'spam.py'): + ... [(11, 12, 3000), (12, 6, 600)], + ... ('baz', 5, 'eggs.py'): [(5, 2, 500)]}, + ... 1E-6) + >>> assert stats1 + stats2 == stats2 + stats1 == stats_sum + """ + timings, unit = self._get_aggregated_timings([self, other]) + return type(self)(timings, unit) + + def __iadd__(self, other): + """ + Example: + >>> stats1 = LineStats( + ... {('foo', 1, 'spam.py'): [(2, 10, 300)], + ... ('bar', 10, 'spam.py'): + ... [(11, 2, 1000), (12, 1, 500)]}, + ... 1E-6) + >>> stats2 = LineStats( + ... {('bar', 10, 'spam.py'): + ... [(11, 10, 20000), (12, 5, 1000)], + ... ('baz', 5, 'eggs.py'): [(5, 2, 5000)]}, + ... 1E-7) + >>> stats_sum = LineStats( + ... {('foo', 1, 'spam.py'): [(2, 10, 300)], + ... ('bar', 10, 'spam.py'): + ... [(11, 12, 3000), (12, 6, 600)], + ... ('baz', 5, 'eggs.py'): [(5, 2, 500)]}, + ... 1E-6) + >>> address = id(stats2) + >>> stats2 += stats1 + >>> assert id(stats2) == address + >>> assert stats2 == stats_sum + """ + self.timings, self.unit = self._get_aggregated_timings([self, other]) + return self + + def print(self, stream=None, **kwargs): + show_text(self.timings, self.unit, stream=stream, **kwargs) + + def to_file(self, filename): + """ Pickle the instance to the given filename. + """ + with open(filename, 'wb') as f: + pickle.dump(self, f, pickle.HIGHEST_PROTOCOL) + + @classmethod + def from_files(cls, file, /, *files): + """ + Utility function to load an instance from the given filenames. + """ + stats_objs = [] + for file in [file, *files]: + with open(file, 'rb') as f: + stats_objs.append(pickle.load(f)) + return cls.from_stats_objects(*stats_objs) + + @classmethod + def from_stats_objects(cls, stats, /, *more_stats): + """ + Example: + >>> stats1 = LineStats( + ... {('foo', 1, 'spam.py'): [(2, 10, 300)], + ... ('bar', 10, 'spam.py'): + ... [(11, 2, 1000), (12, 1, 500)]}, + ... 1E-6) + >>> stats2 = LineStats( + ... {('bar', 10, 'spam.py'): + ... [(11, 10, 20000), (12, 5, 1000)], + ... ('baz', 5, 'eggs.py'): [(5, 2, 5000)]}, + ... 1E-7) + >>> stats_combined = LineStats.from_stats_objects( + ... stats1, stats2) + >>> assert stats_combined.unit == 1E-6 + >>> assert stats_combined.timings == { + ... ('foo', 1, 'spam.py'): [(2, 10, 300)], + ... ('bar', 10, 'spam.py'): + ... [(11, 12, 3000), (12, 6, 600)], + ... ('baz', 5, 'eggs.py'): [(5, 2, 500)]} + """ + timings, unit = cls._get_aggregated_timings([stats, *more_stats]) + return cls(timings, unit) + + @staticmethod + def _get_aggregated_timings(stats_objs): + if not stats_objs: + raise ValueError(f'stats_objs = {stats_objs!r}: empty') + try: + stats, = stats_objs + except ValueError: # > 1 obj + # Add from small scaling factors to large to minimize + # rounding errors + stats_objs = sorted(stats_objs, key=operator.attrgetter('unit')) + unit = stats_objs[-1].unit + # type: dict[tuple[str, int, int], dict[int, tuple[int, float]] + timing_dict = {} + for stats in stats_objs: + factor = stats.unit / unit + for key, entries in stats.timings.items(): + entry_dict = timing_dict.setdefault(key, {}) + for lineno, nhits, time in entries: + prev_nhits, prev_time = entry_dict.get(lineno, (0, 0)) + entry_dict[lineno] = (prev_nhits + nhits, + prev_time + factor * time) + timings = { + key: [(lineno, nhits, int(round(time, 0))) + for lineno, (nhits, time) in sorted(entry_dict.items())] + for key, entry_dict in timing_dict.items()} + else: + timings = {key: entries.copy() + for key, entries in stats.timings.items()} + unit = stats.unit + return timings, unit + + class LineProfiler(CLineProfiler, ByCountProfilerMixin): """ A profiler that records the execution times of individual lines. @@ -296,24 +461,24 @@ def _debug(self, msg): msg = f'{self_repr}: {msg}' logger.debug(msg) + def get_stats(self): + return LineStats.from_stats_objects(super().get_stats()) + def dump_stats(self, filename): """ Dump a representation of the data to a file as a pickled :py:class:`~.LineStats` object from :py:meth:`~.get_stats()`. """ - lstats = self.get_stats() - with open(filename, 'wb') as f: - pickle.dump(lstats, f, pickle.HIGHEST_PROTOCOL) + self.get_stats().to_file(filename) def print_stats(self, stream=None, output_unit=None, stripzeros=False, details=True, summarize=False, sort=False, rich=False, *, config=None): """ Show the gathered statistics. """ - lstats = self.get_stats() - show_text(lstats.timings, lstats.unit, output_unit=output_unit, - stream=stream, stripzeros=stripzeros, - details=details, summarize=summarize, sort=sort, rich=rich, - config=config) + self.get_stats().print( + stream=stream, output_unit=output_unit, + stripzeros=stripzeros, details=details, summarize=summarize, + sort=sort, rich=rich, config=config) def _add_namespace( self, namespace, *, @@ -799,12 +964,7 @@ def show_text(stats, unit, output_unit=None, stream=None, stripzeros=False, stream.write(line + '\n') -def load_stats(filename): - """ Utility function to load a pickled :py:class:`~.LineStats` - object from a given filename. - """ - with open(filename, 'rb') as f: - return pickle.load(f) +load_stats = LineStats.from_files def main(): @@ -846,7 +1006,8 @@ def main(): help='Print a summary of total function time. ' f'(Default: {default.conf_dict["summarize"]})') add_argument(parser, 'profile_output', - help="'*.lprof' file created by `kernprof`") + nargs='+', + help="'*.lprof' file(s) created by `kernprof`") args = parser.parse_args() if args.config: @@ -856,7 +1017,7 @@ def main(): if getattr(args, key, None) is None: setattr(args, key, default) - lstats = load_stats(args.profile_output) + lstats = LineStats.from_files(*args.profile_output) show_text(lstats.timings, lstats.unit, output_unit=args.unit, stripzeros=args.skip_zero, diff --git a/line_profiler/line_profiler.pyi b/line_profiler/line_profiler.pyi index 0c2e655f..7493dc58 100644 --- a/line_profiler/line_profiler.pyi +++ b/line_profiler/line_profiler.pyi @@ -1,16 +1,20 @@ import io -import pathlib from functools import cached_property, partial, partialmethod from os import PathLike from types import FunctionType, ModuleType -from typing import TYPE_CHECKING, overload, Callable, Literal, Mapping, TypeVar +from typing import (TYPE_CHECKING, + overload, + Callable, Mapping, + Literal, Self, + Protocol, TypeVar) try: from typing import ( # type: ignore[attr-defined] # noqa: F401 ParamSpec) except ImportError: from typing_extensions import ParamSpec # noqa: F401 from _typeshed import Incomplete -from ._line_profiler import LineProfiler as CLineProfiler +from ._line_profiler import (LineProfiler as CLineProfiler, + LineStats as CLineStats) from .profiler_mixin import ByCountProfilerMixin, CLevelCallable from .scoping_policy import ScopingPolicy, ScopingPolicyDict @@ -33,6 +37,42 @@ def load_ipython_extension(ip) -> None: ... +class _StatsLike(Protocol): + timings: Mapping[tuple[str, int, str], # funcname, lineno, filename + list[tuple[int, int, int]]] # lineno, nhits, time + unit: float + + +class LineStats(CLineStats): + def to_file(self, filename: PathLike[str] | str) -> None: + ... + + def print(self, stream: Incomplete | None = None, **kwargs) -> None: + ... + + @classmethod + def from_files(cls, file: PathLike[str] | str, /, + *files: PathLike[str] | str) -> Self: + ... + + @classmethod + def from_stats_objects(cls, stats: _StatsLike, /, + *more_stats: _StatsLike) -> Self: + ... + + def __repr__(self) -> str: + ... + + def __eq__(self, other) -> bool: + ... + + def __add__(self, other: _StatsLike) -> Self: + ... + + def __iadd__(self, other: _StatsLike) -> Self: + ... + + class LineProfiler(CLineProfiler, ByCountProfilerMixin): @overload def __call__(self, # type: ignore[overload-overlap] @@ -86,6 +126,9 @@ class LineProfiler(CLineProfiler, ByCountProfilerMixin): name: str | None = None) -> Literal[0, 1]: ... + def get_stats(self) -> LineStats: + ... + def dump_stats(self, filename) -> None: ... @@ -148,8 +191,7 @@ def show_text(stats, ... -def load_stats(filename): - ... +load_stats = LineStats.from_files def main(): diff --git a/tests/test_cli.py b/tests/test_cli.py index 78603e67..082cb757 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,9 +4,13 @@ from functools import partial from io import StringIO from os.path import join +from runpy import run_module from shlex import split -from sys import executable +from sys import argv, executable, stderr +from tempfile import TemporaryDirectory import pytest +import ubelt as ub +from line_profiler import LineProfiler from line_profiler.cli_utils import add_argument @@ -124,9 +128,6 @@ def test_cli(): CommandLine: xdoctest -m ./tests/test_cli.py test_cli """ - import ubelt as ub - import tempfile - # Create a dummy source file code = ub.codeblock( ''' @@ -141,7 +142,7 @@ def my_inefficient_function(): if __name__ == '__main__': my_inefficient_function() ''') - with tempfile.TemporaryDirectory() as tmp_dpath: + with TemporaryDirectory() as tmp_dpath: tmp_src_fpath = join(tmp_dpath, 'foo.py') with open(tmp_src_fpath, 'w') as file: file.write(code) @@ -161,11 +162,72 @@ def my_inefficient_function(): assert '7 100' in info['out'] +def test_multiple_lprof_files(capsys): + """ + Test that we can aggregate profiling results with + ``python -m line_profiler``. + """ + def sum_n(n: int) -> int: + x = 0 + for n in range(1, n + 1): + x += n # Loop: sum_n + return x # Return: sum_n + + def sum_nsq(n: int) -> int: + x = 0 + for n in range(1, n + 1): + x += n * n # Loop: sum_nsq + return x # Return: sum_nsq + + profs = {0: LineProfiler(sum_n), + 1: LineProfiler(sum_nsq), + 2: LineProfiler(sum_n, sum_nsq)} + + with TemporaryDirectory() as tmp_dpath: + # Write several profiling output files + stats_files = [] + nhits = {} + for i, (func, n, expected) in enumerate([ + (sum_n, 10, 10 * 11 // 2), + (sum_nsq, 20, 20 * 21 * 41 // 6), + (sum_n, 30, 30 * 31 // 2)]): + prof = profs[i] + with prof: + assert func(n) == expected + stats = join(tmp_dpath, f'{i}.lprof') + stats_files.append(stats) + prev_loop, prev_return = nhits.get(func, (0, 0)) + nhits[func] = prev_loop + n, prev_return + 1 + prof.dump_stats(stats) + + old_argv = argv.copy() + argv[:] = ['line_profiler', *stats_files] + try: + run_module('line_profiler', run_name='__main__', alter_sys=True) + finally: + argv[:] = old_argv + + # View them and check the output + checks = {} + out, err = capsys.readouterr() + with capsys.disabled(): + print(out, end='') + print(err, end='', file=stderr) + for func in sum_n, sum_nsq: + for comment, n in zip(['Loop', 'Return'], nhits[func]): + checks[f' # {comment}: {func.__name__}'] = n + for line in out.splitlines(): + try: + suffix, = (suffix for suffix in checks if line.endswith(suffix)) + except ValueError: # No match + continue + assert int(line.split()[1]) == checks.pop(suffix) + + def test_version_agreement(): """ Ensure that line_profiler and kernprof have the same version info """ - import ubelt as ub info1 = ub.cmd(f'{executable} -m line_profiler --version') info2 = ub.cmd(f'{executable} -m kernprof --version') diff --git a/tests/test_line_profiler.py b/tests/test_line_profiler.py index 96773b31..d39e0040 100644 --- a/tests/test_line_profiler.py +++ b/tests/test_line_profiler.py @@ -4,11 +4,14 @@ import gc import inspect import io +import os +import pickle import sys import textwrap import types +from tempfile import TemporaryDirectory import pytest -from line_profiler import _line_profiler, LineProfiler +from line_profiler import _line_profiler, LineProfiler, LineStats def f(x): @@ -1162,3 +1165,42 @@ def func_try_except_finally(reraise): line = next(line for line in result.splitlines() if line.endswith(comment)) assert line.split()[1] == str(nhits) + + +@pytest.mark.parametrize('n', [1, 2]) +@pytest.mark.parametrize('legacy', [True, False]) +def test_load_stats_files(legacy, n): + """ + Test the loading of stats files. If ``legacy`` is true, the + tempfiles are written from + :py:class:`line_profiler._line_profiler.LineStats` objects instead + of :py:class:`line_profiler.line_profiler.LineStats` objects, so + that we ensure that ``'.lprof'`` files written by old versions of + :py:mod:`line_profiler` is still properly handled. + """ + def write(stats, filename): + if legacy: + legacy_stats = type(stats).__base__(stats.timings, stats.unit) + assert not isinstance(legacy_stats, LineStats) + with open(filename, mode='wb') as fobj: + pickle.dump(legacy_stats, fobj) + else: + stats.to_file(filename) + return filename + + stats1 = LineStats({('foo', 1, 'spam.py'): [(2, 3, 3600)]}, .015625) + stats2 = LineStats({('foo', 1, 'spam.py'): [(2, 4, 700)], + ('bar', 10, 'spam.py'): [(10, 20, 1000)]}, + .0625) + with TemporaryDirectory() as tmpdir: + fname1 = write(stats1, os.path.join(tmpdir, '1.lprof')) + if n == 1: + stats_combined = stats1 + files = [fname1] + else: + fname2 = write(stats2, os.path.join(tmpdir, '2.lprof')) + stats_combined = stats1 + stats2 + files = [fname1, fname2] + stats_read = LineStats.from_files(*files) + assert isinstance(stats_read, LineStats) + assert stats_read == stats_combined