Skip to content

Commit 01602ae

Browse files
Olshanskmiss-islington
authored andcommitted
bpo-37958: Adding get_profile_dict to pstats (GH-15495)
pstats is really useful or profiling and printing the output of the execution of some block of code, but I've found on multiple occasions when I'd like to access this output directly in an easily usable dictionary on which I can further analyze or manipulate. The proposal is to add a function called get_profile_dict inside of pstats that'll automatically return this data the data in an easily accessible dict. The output of the following script: ``` import cProfile, pstats import pprint from pstats import func_std_string, f8 def fib(n): if n == 0: return 0 if n == 1: return 1 return fib(n-1) + fib(n-2) pr = cProfile.Profile() pr.enable() fib(5) pr.create_stats() ps = pstats.Stats(pr).sort_stats('tottime', 'cumtime') def get_profile_dict(self, keys_filter=None): """ Returns a dict where the key is a function name and the value is a dict with the following keys: - ncalls - tottime - percall_tottime - cumtime - percall_cumtime - file_name - line_number keys_filter can be optionally set to limit the key-value pairs in the retrieved dict. """ pstats_dict = {} func_list = self.fcn_list[:] if self.fcn_list else list(self.stats.keys()) if not func_list: return pstats_dict pstats_dict["total_tt"] = float(f8(self.total_tt)) for func in func_list: cc, nc, tt, ct, callers = self.stats[func] file, line, func_name = func ncalls = str(nc) if nc == cc else (str(nc) + '/' + str(cc)) tottime = float(f8(tt)) percall_tottime = -1 if nc == 0 else float(f8(tt/nc)) cumtime = float(f8(ct)) percall_cumtime = -1 if cc == 0 else float(f8(ct/cc)) func_dict = { "ncalls": ncalls, "tottime": tottime, # time spent in this function alone "percall_tottime": percall_tottime, "cumtime": cumtime, # time spent in the function plus all functions that this function called, "percall_cumtime": percall_cumtime, "file_name": file, "line_number": line } func_dict_filtered = func_dict if not keys_filter else { key: func_dict[key] for key in keys_filter } pstats_dict[func_name] = func_dict_filtered return pstats_dict pp = pprint.PrettyPrinter(depth=6) pp.pprint(get_profile_dict(ps)) ``` will produce: ``` {"<method 'disable' of '_lsprof.Profiler' objects>": {'cumtime': 0.0, 'file_name': '~', 'line_number': 0, 'ncalls': '1', 'percall_cumtime': 0.0, 'percall_tottime': 0.0, 'tottime': 0.0}, 'create_stats': {'cumtime': 0.0, 'file_name': '/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/cProfile.py', 'line_number': 50, 'ncalls': '1', 'percall_cumtime': 0.0, 'percall_tottime': 0.0, 'tottime': 0.0}, 'fib': {'cumtime': 0.0, 'file_name': 'get_profile_dict.py', 'line_number': 5, 'ncalls': '15/1', 'percall_cumtime': 0.0, 'percall_tottime': 0.0, 'tottime': 0.0}, 'total_tt': 0.0} ``` As an example, this can be used to generate a stacked column chart using various visualization tools which will assist in easily identifying program bottlenecks. https://bugs.python.org/issue37958 Automerge-Triggered-By: @gpshead
1 parent dc0284e commit 01602ae

File tree

4 files changed

+90
-4
lines changed

4 files changed

+90
-4
lines changed

Doc/library/profile.rst

+11
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,17 @@ Analysis of the profiler data is done using the :class:`~pstats.Stats` class.
525525
ordering are identical to the :meth:`~pstats.Stats.print_callers` method.
526526

527527

528+
.. method:: get_stats_profile()
529+
530+
This method returns an instance of StatsProfile, which contains a mapping
531+
of function names to instances of FunctionProfile. Each FunctionProfile
532+
instance holds information related to the function's profile such as how
533+
long the function took to run, how many times it was called, etc...
534+
535+
.. versionadded:: 3.9
536+
Added the following dataclasses: StatsProfile, FunctionProfile.
537+
Added the following function: get_stats_profile.
538+
528539
.. _deterministic-profiling:
529540

530541
What Is Deterministic Profiling?

Lib/pstats.py

+55-2
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
import time
2626
import marshal
2727
import re
28+
2829
from enum import Enum
2930
from functools import cmp_to_key
31+
from dataclasses import dataclass
32+
from typing import Dict
3033

31-
__all__ = ["Stats", "SortKey"]
32-
34+
__all__ = ["Stats", "SortKey", "FunctionProfile", "StatsProfile"]
3335

3436
class SortKey(str, Enum):
3537
CALLS = 'calls', 'ncalls'
@@ -52,6 +54,22 @@ def __new__(cls, *values):
5254
return obj
5355

5456

57+
@dataclass(unsafe_hash=True)
58+
class FunctionProfile:
59+
ncalls: int
60+
tottime: float
61+
percall_tottime: float
62+
cumtime: float
63+
percall_cumtime: float
64+
file_name: str
65+
line_number: int
66+
67+
@dataclass(unsafe_hash=True)
68+
class StatsProfile:
69+
'''Class for keeping track of an item in inventory.'''
70+
total_tt: float
71+
func_profiles: Dict[str, FunctionProfile]
72+
5573
class Stats:
5674
"""This class is used for creating reports from data generated by the
5775
Profile class. It is a "friend" of that class, and imports data either
@@ -333,6 +351,41 @@ def eval_print_amount(self, sel, list, msg):
333351

334352
return new_list, msg
335353

354+
def get_stats_profile(self):
355+
"""This method returns an instance of StatsProfile, which contains a mapping
356+
of function names to instances of FunctionProfile. Each FunctionProfile
357+
instance holds information related to the function's profile such as how
358+
long the function took to run, how many times it was called, etc...
359+
"""
360+
func_list = self.fcn_list[:] if self.fcn_list else list(self.stats.keys())
361+
if not func_list:
362+
return StatsProfile(0, {})
363+
364+
total_tt = float(f8(self.total_tt))
365+
func_profiles = {}
366+
stats_profile = StatsProfile(total_tt, func_profiles)
367+
368+
for func in func_list:
369+
cc, nc, tt, ct, callers = self.stats[func]
370+
file_name, line_number, func_name = func
371+
ncalls = str(nc) if nc == cc else (str(nc) + '/' + str(cc))
372+
tottime = float(f8(tt))
373+
percall_tottime = -1 if nc == 0 else float(f8(tt/nc))
374+
cumtime = float(f8(ct))
375+
percall_cumtime = -1 if cc == 0 else float(f8(ct/cc))
376+
func_profile = FunctionProfile(
377+
ncalls,
378+
tottime, # time spent in this function alone
379+
percall_tottime,
380+
cumtime, # time spent in the function plus all functions that this function called,
381+
percall_cumtime,
382+
file_name,
383+
line_number
384+
)
385+
func_profiles[func_name] = func_profile
386+
387+
return stats_profile
388+
336389
def get_print_list(self, sel_list):
337390
width = self.max_name_len
338391
if self.fcn_list:

Lib/test/test_pstats.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import unittest
2+
23
from test import support
34
from io import StringIO
4-
import pstats
55
from pstats import SortKey
66

7-
7+
import pstats
8+
import time
9+
import cProfile
810

911
class AddCallersTestCase(unittest.TestCase):
1012
"""Tests for pstats.add_callers helper."""
@@ -75,6 +77,24 @@ def test_sort_starts_mix(self):
7577
SortKey.TIME,
7678
'calls')
7779

80+
def test_get_stats_profile(self):
81+
def pass1(): pass
82+
def pass2(): pass
83+
def pass3(): pass
84+
85+
pr = cProfile.Profile()
86+
pr.enable()
87+
pass1()
88+
pass2()
89+
pass3()
90+
pr.create_stats()
91+
ps = pstats.Stats(pr)
92+
93+
stats_profile = ps.get_stats_profile()
94+
funcs_called = set(stats_profile.func_profiles.keys())
95+
self.assertIn('pass1', funcs_called)
96+
self.assertIn('pass2', funcs_called)
97+
self.assertIn('pass3', funcs_called)
7898

7999
if __name__ == "__main__":
80100
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Added the pstats.Stats.get_profile_dict() method to return the profile
2+
data as a StatsProfile instance.

0 commit comments

Comments
 (0)