Skip to content

Commit 3e09ff4

Browse files
cold start tracing (python) (#299)
* (wip) coldstart tracing POC checkin * feat: rename cold start span so it is nested properly. * simplify how to wrap methods with push_node and pop_node and bug fixes * remove cold_start_tracing file and use cold_start file * add test_cold_start * fix lint and integration tests * make sure loader is Hashable in order to use in a set * add logging, add default skip libs --------- Co-authored-by: AJ Stuyvenberg <[email protected]>
1 parent 5286cb8 commit 3e09ff4

File tree

5 files changed

+450
-5
lines changed

5 files changed

+450
-5
lines changed

datadog_lambda/__init__.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
import os
2+
import logging
3+
from datadog_lambda.cold_start import initialize_cold_start_tracing
4+
5+
initialize_cold_start_tracing()
6+
17
# The minor version corresponds to the Lambda layer version.
28
# E.g.,, version 0.5.0 gets packaged into layer version 5.
39
try:
@@ -7,8 +13,5 @@
713

814
__version__ = importlib_metadata.version(__name__)
915

10-
import os
11-
import logging
12-
1316
logger = logging.getLogger(__name__)
1417
logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper()))

datadog_lambda/cold_start.py

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
import time
2+
import os
3+
from typing import List, Hashable
4+
import logging
5+
6+
logger = logging.getLogger(__name__)
7+
18
_cold_start = True
29
_lambda_container_initialized = False
310

@@ -21,3 +28,196 @@ def is_cold_start():
2128
def get_cold_start_tag():
2229
"""Returns the cold start tag to be used in metrics"""
2330
return "cold_start:{}".format(str(is_cold_start()).lower())
31+
32+
33+
class ImportNode(object):
34+
def __init__(self, module_name, full_file_path, start_time_ns, end_time_ns=None):
35+
self.module_name = module_name
36+
self.full_file_path = full_file_path
37+
self.start_time_ns = start_time_ns
38+
self.end_time_ns = end_time_ns
39+
self.children = []
40+
41+
42+
root_nodes: List[ImportNode] = []
43+
import_stack: List[ImportNode] = []
44+
already_wrapped_loaders = set()
45+
46+
47+
def reset_node_stacks():
48+
global root_nodes
49+
root_nodes = []
50+
global import_stack
51+
import_stack = []
52+
53+
54+
def push_node(module_name, file_path):
55+
node = ImportNode(module_name, file_path, time.time_ns())
56+
global import_stack
57+
if import_stack:
58+
import_stack[-1].children.append(node)
59+
import_stack.append(node)
60+
61+
62+
def pop_node(module_name):
63+
global import_stack
64+
if not import_stack:
65+
return
66+
node = import_stack.pop()
67+
if node.module_name != module_name:
68+
return
69+
end_time_ns = time.time_ns()
70+
node.end_time_ns = end_time_ns
71+
if not import_stack: # import_stack empty, a root node has been found
72+
global root_nodes
73+
root_nodes.append(node)
74+
75+
76+
def wrap_exec_module(original_exec_module):
77+
def wrapped_method(module):
78+
should_pop = False
79+
try:
80+
spec = module.__spec__
81+
push_node(spec.name, spec.origin)
82+
should_pop = True
83+
except Exception:
84+
pass
85+
try:
86+
return original_exec_module(module)
87+
finally:
88+
if should_pop:
89+
pop_node(spec.name)
90+
91+
return wrapped_method
92+
93+
94+
def wrap_find_spec(original_find_spec):
95+
def wrapped_find_spec(*args, **kwargs):
96+
spec = original_find_spec(*args, **kwargs)
97+
if spec is None:
98+
return None
99+
loader = getattr(spec, "loader", None)
100+
if (
101+
loader is not None
102+
and isinstance(loader, Hashable)
103+
and loader not in already_wrapped_loaders
104+
):
105+
if hasattr(loader, "exec_module"):
106+
try:
107+
loader.exec_module = wrap_exec_module(loader.exec_module)
108+
already_wrapped_loaders.add(loader)
109+
except Exception as e:
110+
logger.debug("Failed to wrap the loader. %s", e)
111+
return spec
112+
113+
return wrapped_find_spec
114+
115+
116+
def initialize_cold_start_tracing():
117+
if (
118+
is_cold_start()
119+
and os.environ.get("DD_TRACE_ENABLED", "true").lower() == "true"
120+
and os.environ.get("DD_COLD_START_TRACING", "true").lower() == "true"
121+
):
122+
from sys import version_info, meta_path
123+
124+
if version_info >= (3, 7): # current implementation only support version > 3.7
125+
for importer in meta_path:
126+
try:
127+
importer.find_spec = wrap_find_spec(importer.find_spec)
128+
except Exception:
129+
pass
130+
131+
132+
class ColdStartTracer(object):
133+
def __init__(
134+
self,
135+
tracer,
136+
function_name,
137+
cold_start_span_finish_time_ns,
138+
trace_ctx,
139+
min_duration_ms: int,
140+
ignored_libs: List[str] = [],
141+
):
142+
self._tracer = tracer
143+
self.function_name = function_name
144+
self.cold_start_span_finish_time_ns = cold_start_span_finish_time_ns
145+
self.min_duration_ms = min_duration_ms
146+
self.trace_ctx = trace_ctx
147+
self.ignored_libs = ignored_libs
148+
self.need_to_reactivate_context = True
149+
150+
def trace(self, root_nodes: List[ImportNode] = root_nodes):
151+
if not root_nodes:
152+
return
153+
cold_start_span_start_time_ns = root_nodes[0].start_time_ns
154+
cold_start_span = self.create_cold_start_span(cold_start_span_start_time_ns)
155+
while root_nodes:
156+
root_node = root_nodes.pop()
157+
self.trace_tree(root_node, cold_start_span)
158+
self.finish_span(cold_start_span, self.cold_start_span_finish_time_ns)
159+
160+
def trace_tree(self, import_node: ImportNode, parent_span):
161+
if (
162+
import_node.end_time_ns - import_node.start_time_ns
163+
< self.min_duration_ms * 1e6
164+
or import_node.module_name in self.ignored_libs
165+
):
166+
return
167+
168+
span = self.start_span(
169+
"aws.lambda.import", import_node.module_name, import_node.start_time_ns
170+
)
171+
tags = {
172+
"resource_names": import_node.module_name,
173+
"resource.name": import_node.module_name,
174+
"filename": import_node.full_file_path,
175+
"operation_name": self.get_operation_name(import_node.full_file_path),
176+
}
177+
span.set_tags(tags)
178+
if parent_span:
179+
span.parent_id = parent_span.span_id
180+
for child_node in import_node.children:
181+
self.trace_tree(child_node, span)
182+
self.finish_span(span, import_node.end_time_ns)
183+
184+
def create_cold_start_span(self, start_time_ns):
185+
span = self.start_span("aws.lambda.load", self.function_name, start_time_ns)
186+
tags = {
187+
"resource_names": self.function_name,
188+
"resource.name": self.function_name,
189+
"operation_name": "aws.lambda.load",
190+
}
191+
span.set_tags(tags)
192+
return span
193+
194+
def start_span(self, span_type, resource, start_time_ns):
195+
if self.need_to_reactivate_context:
196+
self._tracer.context_provider.activate(
197+
self.trace_ctx
198+
) # reactivate required after each finish() call
199+
self.need_to_reactivate_context = False
200+
span_kwargs = {
201+
"service": "aws.lambda",
202+
"resource": resource,
203+
"span_type": span_type,
204+
}
205+
span = self._tracer.trace(span_type, **span_kwargs)
206+
span.start_ns = start_time_ns
207+
return span
208+
209+
def finish_span(self, span, finish_time_ns):
210+
span.finish(finish_time_ns / 1e9)
211+
self.need_to_reactivate_context = True
212+
213+
def get_operation_name(self, filename: str):
214+
if filename is None:
215+
return "aws.lambda.import_core_module"
216+
if not isinstance(filename, str):
217+
return "aws.lambda.import"
218+
if filename.startswith("/opt/"):
219+
return "aws.lambda.import_layer"
220+
elif filename.startswith("/var/lang/"):
221+
return "aws.lambda.import_runtime"
222+
else:
223+
return "aws.lambda.import"

datadog_lambda/wrapper.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# under the Apache License Version 2.0.
33
# This product includes software developed at Datadog (https://www.datadoghq.com/).
44
# Copyright 2019 Datadog, Inc.
5-
65
import base64
76
import os
87
import logging
@@ -12,7 +11,7 @@
1211
from time import time_ns
1312

1413
from datadog_lambda.extension import should_use_extension, flush_extension
15-
from datadog_lambda.cold_start import set_cold_start, is_cold_start
14+
from datadog_lambda.cold_start import set_cold_start, is_cold_start, ColdStartTracer
1615
from datadog_lambda.constants import (
1716
TraceContextSource,
1817
XraySubsegment,
@@ -38,6 +37,7 @@
3837
create_inferred_span,
3938
InferredSpanInfo,
4039
is_authorizer_response,
40+
tracer,
4141
)
4242
from datadog_lambda.trigger import (
4343
extract_trigger_tags,
@@ -131,6 +131,28 @@ def __init__(self, func):
131131
self.decode_authorizer_context = (
132132
os.environ.get("DD_DECODE_AUTHORIZER_CONTEXT", "true").lower() == "true"
133133
)
134+
self.cold_start_tracing = (
135+
os.environ.get("DD_COLD_START_TRACING", "true").lower() == "true"
136+
)
137+
self.min_cold_start_trace_duration = 3
138+
if "DD_MIN_COLD_START_DURATION" in os.environ:
139+
try:
140+
self.min_cold_start_trace_duration = int(
141+
os.environ["DD_MIN_COLD_START_DURATION"]
142+
)
143+
except Exception:
144+
logger.debug("Malformatted env DD_MIN_COLD_START_DURATION")
145+
self.cold_start_trace_skip_lib = [
146+
"ddtrace.internal.compat",
147+
"ddtrace.filters",
148+
]
149+
if "DD_COLD_START_TRACE_SKIP_LIB" in os.environ:
150+
try:
151+
self.cold_start_trace_skip_lib = os.environ[
152+
"DD_COLD_START_TRACE_SKIP_LIB"
153+
].split(",")
154+
except Exception:
155+
logger.debug("Malformatted for env DD_COLD_START_TRACE_SKIP_LIB")
134156
self.response = None
135157
if profiling_env_var:
136158
self.prof = profiler.Profiler(env=env_env_var, service=service_env_var)
@@ -257,6 +279,11 @@ def _after(self, event, context):
257279
create_dd_dummy_metadata_subsegment(
258280
self.trigger_tags, XraySubsegment.LAMBDA_FUNCTION_TAGS_KEY
259281
)
282+
should_trace_cold_start = (
283+
dd_tracing_enabled and self.cold_start_tracing and is_cold_start()
284+
)
285+
if should_trace_cold_start:
286+
trace_ctx = tracer.current_trace_context()
260287

261288
if self.span:
262289
if dd_capture_lambda_payload_enabled:
@@ -276,6 +303,20 @@ def _after(self, event, context):
276303
else:
277304
self.inferred_span.finish()
278305

306+
if should_trace_cold_start:
307+
try:
308+
following_span = self.span or self.inferred_span
309+
ColdStartTracer(
310+
tracer,
311+
self.function_name,
312+
following_span.start_ns,
313+
trace_ctx,
314+
self.min_cold_start_trace_duration,
315+
self.cold_start_trace_skip_lib,
316+
).trace()
317+
except Exception as e:
318+
logger.debug("Failed to create cold start spans. %s", e)
319+
279320
if not self.flush_to_log or should_use_extension:
280321
flush_stats()
281322
if should_use_extension:

tests/integration/serverless.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ provider:
1111
DD_TRACE_ENABLED: true
1212
DD_API_KEY: ${env:DD_API_KEY}
1313
DD_TRACE_MANAGED_SERVICES: true
14+
DD_COLD_START_TRACING: false
1415
timeout: 15
1516
deploymentBucket:
1617
name: integration-tests-serververless-deployment-bucket

0 commit comments

Comments
 (0)