Skip to content

Commit 4807a2c

Browse files
mergify[bot]Julien Danjou
and
Julien Danjou
authored
refactor(profiling): type pprof exporter and fix type comparison (#2987) (#3062)
There was an ongoing issue wih the pprof encode mixing type between strings, ints and None when dealing with labels. This has been fixed in multiple places and in multiple occasion, but there were still holes. When encoding Events to pprof, they are grouped by event fields. Those fields might be str, int or something else. In order to group those events, all the fields used for grouping must be: 1. hashable (because we index in a dict) 2. sortable (because we sort event to have reproducible) 3. string (because we store those as labels in pprof) The problem is that most value type used in events are hashable (None, int or str) but sortability often fails when one of the field is None: you can't compare `int` with `None`, making sorted() call fails like in #2962. As we need string in the end, the fix is to convert everything to strings as soon as possible when grouping events: that makes sure conditions 1, 2 and 3 are validated ASAP and everything works. To do this, the code has been updated and typed to catch any future error. Fixes #2962 Co-authored-by: Brett Langdon <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> (cherry picked from commit 6520212) Co-authored-by: Julien Danjou <[email protected]>
1 parent be1380e commit 4807a2c

23 files changed

+525
-320
lines changed

ddtrace/profiling/_build.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
import typing
22

33

4-
compiled_with: typing.Tuple[int, int, int] = (PY_MAJOR_VERSION, PY_MINOR_VERSION, PY_MICRO_VERSION)
4+
compiled_with: typing.Tuple[int, int, int] = (PY_MAJOR_VERSION, PY_MINOR_VERSION, PY_MICRO_VERSION) # type: ignore[name-defined]
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import types
22
import typing
33

4-
# (filename, line number, function name)
5-
FrameType = typing.Tuple[str, int, str]
4+
from .. import event
65

76
def traceback_to_frames(
87
traceback: types.TracebackType, max_nframes: int
9-
) -> typing.Tuple[typing.List[FrameType], int]: ...
10-
def pyframe_to_frames(frame: types.FrameType, max_nframes: int) -> typing.Tuple[typing.List[FrameType], int]: ...
8+
) -> typing.Tuple[typing.List[event.FrameType], int]: ...
9+
def pyframe_to_frames(frame: types.FrameType, max_nframes: int) -> typing.Tuple[typing.List[event.FrameType], int]: ...

ddtrace/profiling/collector/memalloc.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,24 @@
2727
class MemoryAllocSampleEvent(event.StackBasedEvent):
2828
"""A sample storing memory allocation tracked."""
2929

30-
size = attr.ib(default=None)
30+
size = attr.ib(default=0, type=int)
3131
"""Allocation size in bytes."""
3232

33-
capture_pct = attr.ib(default=None)
33+
capture_pct = attr.ib(default=None, type=float)
3434
"""The capture percentage."""
3535

36-
nevents = attr.ib(default=None)
36+
nevents = attr.ib(default=0, type=int)
3737
"""The total number of allocation events sampled."""
3838

3939

4040
@event.event_class
4141
class MemoryHeapSampleEvent(event.StackBasedEvent):
4242
"""A sample storing memory allocation tracked."""
4343

44-
size = attr.ib(default=None)
44+
size = attr.ib(default=0, type=int)
4545
"""Allocation size in bytes."""
4646

47-
sample_size = attr.ib(default=None)
47+
sample_size = attr.ib(default=0, type=int)
4848
"""The sampling size."""
4949

5050

ddtrace/profiling/collector/stack.pyx

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ from ddtrace.profiling import event
2020
from ddtrace.profiling.collector import _task
2121
from ddtrace.profiling.collector import _threading
2222
from ddtrace.profiling.collector import _traceback
23+
from ddtrace.profiling.collector import stack_event
2324

2425

2526
# NOTE: Do not use LOG here. This code runs under a real OS thread and is unable to acquire any lock of the `logging`
@@ -138,22 +139,6 @@ ELSE:
138139
}
139140

140141

141-
@event.event_class
142-
class StackSampleEvent(event.StackBasedEvent):
143-
"""A sample storing executions frames for a thread."""
144-
145-
# Wall clock
146-
wall_time_ns = attr.ib(default=0)
147-
# CPU time in nanoseconds
148-
cpu_time_ns = attr.ib(default=0)
149-
150-
151-
@event.event_class
152-
class StackExceptionSampleEvent(event.StackBasedEvent):
153-
"""A a sample storing raised exceptions and their stack frames."""
154-
155-
exc_type = attr.ib(default=None)
156-
157142
from cpython.object cimport PyObject
158143

159144

@@ -319,7 +304,7 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim
319304

320305
frames, nframes = _traceback.pyframe_to_frames(frame, max_nframes)
321306

322-
event = StackSampleEvent(
307+
event = stack_event.StackSampleEvent(
323308
thread_id=thread_id,
324309
thread_native_id=thread_native_id,
325310
thread_name=thread_name,
@@ -338,7 +323,7 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim
338323
if exception is not None:
339324
exc_type, exc_traceback = exception
340325
frames, nframes = _traceback.traceback_to_frames(exc_traceback, max_nframes)
341-
exc_event = StackExceptionSampleEvent(
326+
exc_event = stack_event.StackExceptionSampleEvent(
342327
thread_id=thread_id,
343328
thread_name=thread_name,
344329
thread_native_id=thread_native_id,
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import typing
2+
3+
import attr
4+
5+
from ddtrace.profiling import event
6+
7+
8+
@event.event_class
9+
class StackSampleEvent(event.StackBasedEvent):
10+
"""A sample storing executions frames for a thread."""
11+
12+
# Wall clock
13+
wall_time_ns = attr.ib(default=0, type=int)
14+
# CPU time in nanoseconds
15+
cpu_time_ns = attr.ib(default=0, type=int)
16+
17+
18+
@event.event_class
19+
class StackExceptionSampleEvent(event.StackBasedEvent):
20+
"""A a sample storing raised exceptions and their stack frames."""
21+
22+
exc_type = attr.ib(default=None, type=typing.Optional[str])

ddtrace/profiling/collector/threading.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,22 +23,22 @@
2323
class LockEventBase(event.StackBasedEvent):
2424
"""Base Lock event."""
2525

26-
lock_name = attr.ib(default=None, type=typing.Optional[str])
27-
sampling_pct = attr.ib(default=None, type=typing.Optional[int])
26+
lock_name = attr.ib(default="<unknown lock name>", type=str)
27+
sampling_pct = attr.ib(default=0, type=int)
2828

2929

3030
@event.event_class
3131
class LockAcquireEvent(LockEventBase):
3232
"""A lock has been acquired."""
3333

34-
wait_time_ns = attr.ib(default=None, type=typing.Optional[int])
34+
wait_time_ns = attr.ib(default=0, type=int)
3535

3636

3737
@event.event_class
3838
class LockReleaseEvent(LockEventBase):
3939
"""A lock has been released."""
4040

41-
locked_for_ns = attr.ib(default=None, type=typing.Optional[int])
41+
locked_for_ns = attr.ib(default=0, type=int)
4242

4343

4444
def _current_thread():

ddtrace/profiling/event.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@
88

99
_T = typing.TypeVar("_T")
1010

11+
# (filename, line number, function name)
12+
FrameType = typing.Tuple[str, int, typing.Optional[str]]
13+
StackTraceType = typing.List[FrameType]
14+
1115

1216
def event_class(
13-
klass # type: typing.Type[_T]
17+
klass, # type: typing.Type[_T]
1418
):
1519
# type: (...) -> typing.Type[_T]
1620
return attr.s(slots=True)(klass)
@@ -45,23 +49,24 @@ class SampleEvent(Event):
4549

4650
@event_class
4751
class StackBasedEvent(SampleEvent):
48-
thread_id = attr.ib(default=None)
49-
thread_name = attr.ib(default=None)
50-
thread_native_id = attr.ib(default=None)
51-
task_id = attr.ib(default=None)
52-
task_name = attr.ib(default=None)
53-
frames = attr.ib(default=None)
54-
nframes = attr.ib(default=None)
52+
thread_id = attr.ib(default=None, type=typing.Optional[int])
53+
thread_name = attr.ib(default=None, type=typing.Optional[str])
54+
thread_native_id = attr.ib(default=None, type=typing.Optional[int])
55+
task_id = attr.ib(default=None, type=typing.Optional[int])
56+
task_name = attr.ib(default=None, type=typing.Optional[str])
57+
frames = attr.ib(default=None, type=StackTraceType)
58+
nframes = attr.ib(default=0, type=int)
5559
local_root_span_id = attr.ib(default=None, type=typing.Optional[int])
5660
span_id = attr.ib(default=None, type=typing.Optional[int])
5761
trace_type = attr.ib(default=None, type=typing.Optional[str])
58-
trace_resource_container = attr.ib(default=None)
62+
trace_resource_container = attr.ib(default=None, type=typing.List[str])
5963

6064
def set_trace_info(
6165
self,
6266
span, # type: typing.Optional[ddspan.Span]
6367
endpoint_collection_enabled, # type: bool
6468
):
69+
# type: (...) -> None
6570
if span:
6671
self.span_id = span.span_id
6772
if span._local_root is not None:

ddtrace/profiling/exporter/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import typing
2+
13
import attr
24

35

@@ -10,7 +12,7 @@ class Exporter(object):
1012
"""Exporter base class."""
1113

1214
def export(self, events, start_time_ns, end_time_ns):
13-
# type: (...) -> None
15+
# type: (...) -> typing.Any
1416
"""Export events.
1517
1618
:param events: List of events to export.

ddtrace/profiling/exporter/file.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,23 @@
55

66
from ddtrace.profiling.exporter import pprof
77

8+
from .. import recorder
9+
810

911
@attr.s
1012
class PprofFileExporter(pprof.PprofExporter):
1113
"""PProf file exporter."""
1214

13-
prefix = attr.ib()
14-
_increment = attr.ib(default=1, init=False, repr=False)
15+
prefix = attr.ib(type=str)
16+
_increment = attr.ib(default=1, init=False, repr=False, type=int)
1517

16-
def export(self, events, start_time_ns, end_time_ns):
17-
# type: (...) -> None
18+
def export(
19+
self,
20+
events, # type: recorder.EventsType
21+
start_time_ns, # type: int
22+
end_time_ns, # type: int
23+
):
24+
# type: (...) -> pprof.pprof_ProfileType
1825
"""Export events to pprof file.
1926
2027
The file name is based on the prefix passed to init. The process ID number and type of export is then added as a
@@ -26,5 +33,6 @@ def export(self, events, start_time_ns, end_time_ns):
2633
"""
2734
profile = super(PprofFileExporter, self).export(events, start_time_ns, end_time_ns)
2835
with gzip.open(self.prefix + (".%d.%d" % (os.getpid(), self._increment)), "wb") as f:
29-
f.write(profile.SerializeToString())
36+
f.write(profile.SerializeToString()) # type: ignore[attr-defined]
3037
self._increment += 1
38+
return profile

0 commit comments

Comments
 (0)