Skip to content

Commit 94d1f7b

Browse files
hghotraagocsjtappa
authored
[SLS-1824] SNS trace extractor (#201)
* Extract trace context from sns payload * Fix extractor issue * Handle SNS message inside SQS event * Add extractor for Eventbridge context (#202) * Update expected breaking change date (#114) * change inferred_span to _inferred_span * Add extractor for eventbridge trace context * Add another test to test eventbridge extraction * Get tracing.Literal for pythons that don't have it already * Add _datadog to eventbridge extractor * Update integration tests * Remove init complete and main start logs Co-authored-by: Jorie Helwig <[email protected]> Co-authored-by: Harvinder Ghotra <[email protected]> * Remove duplicate entry * Kinesis extractor plus small fixes * Add snapshots * Add tests Co-authored-by: Christopher Agocs <[email protected]> Co-authored-by: Jorie Helwig <[email protected]>
1 parent ba58ecd commit 94d1f7b

29 files changed

+648
-159
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
[![Slack](https://chat.datadoghq.com/badge.svg?bg=632CA6)](https://chat.datadoghq.com/)
77
[![License](https://img.shields.io/badge/license-Apache--2.0-blue)](https://github.com/DataDog/datadog-lambda-python/blob/main/LICENSE)
88

9-
Datadog Lambda Library for Python (3.6, 3.7, 3.8, and 3.9) enables enhanced Lambda metrics, distributed tracing, and custom metric submission from AWS Lambda functions.
9+
Datadog Lambda Library for Python (3.6, 3.7, 3.8, and 3.9) enables enhanced Lambda metrics, distributed tracing, and custom metric submission from AWS Lambda functions.
1010

1111
**IMPORTANT NOTE:** AWS Lambda is expected to receive a [breaking change](https://aws.amazon.com/blogs/compute/upcoming-changes-to-the-python-sdk-in-aws-lambda/) on **March 31, 2021**. If you are using Datadog Python Lambda layer version 7 or below, please upgrade to the latest.
1212

@@ -89,11 +89,12 @@ Set to `true` to merge the X-Ray trace and the Datadog trace, when using both th
8989

9090
### DD_INFERRED_SPANS (experimental)
9191

92-
Inferred Spans are spans that Datadog can create based on incoming event metadata.
92+
Inferred Spans are spans that Datadog can create based on incoming event metadata.
9393
Set `DD_INFERRED_SPANS` to `true` to infer spans based on Lambda events.
94-
Inferring upstream spans is only supported if you are using the [Datadog Lambda Extension](https://docs.datadoghq.com/serverless/libraries_integrations/extension/).
94+
Inferring upstream spans is only supported if you are using the [Datadog Lambda Extension](https://docs.datadoghq.com/serverless/libraries_integrations/extension/).
9595
Defaults to `false`.
9696
Infers spans for:
97+
9798
- API Gateway REST events
9899
- API Gateway websocket events
99100
- HTTP API events

datadog_lambda/tracing.py

Lines changed: 134 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
import os
88
import json
9+
import base64
910
from datetime import datetime, timezone
1011
from typing import Optional, Dict
1112

@@ -184,23 +185,98 @@ def extract_context_from_http_event_or_context(event, lambda_context):
184185
return trace_id, parent_id, sampling_priority
185186

186187

187-
def extract_context_from_sqs_event_or_context(event, lambda_context):
188+
def create_sns_event(message):
189+
return {
190+
"Records": [
191+
{
192+
"EventSource": "aws:sns",
193+
"EventVersion": "1.0",
194+
"Sns": message,
195+
}
196+
]
197+
}
198+
199+
200+
def extract_context_from_sqs_or_sns_event_or_context(event, lambda_context):
188201
"""
189202
Extract Datadog trace context from the first SQS message attributes.
190203
191204
Falls back to lambda context if no trace data is found in the SQS message attributes.
192205
"""
193206
try:
194207
first_record = event["Records"][0]
195-
msg_attributes = first_record.get("messageAttributes", {})
196-
dd_json_data = msg_attributes.get("_datadog", {}).get("stringValue", r"{}")
208+
209+
# logic to deal with SNS => SQS event
210+
if "body" in first_record:
211+
body_str = first_record.get("body", {})
212+
try:
213+
body = json.loads(body_str)
214+
if body.get("Type", "") == "Notification" and "TopicArn" in body:
215+
logger.debug("Found SNS message inside SQS event")
216+
first_record = get_first_record(create_sns_event(body))
217+
except Exception:
218+
first_record = event["Records"][0]
219+
pass
220+
221+
msg_attributes = first_record.get(
222+
"messageAttributes",
223+
first_record.get("Sns", {}).get("MessageAttributes", {}),
224+
)
225+
dd_payload = msg_attributes.get("_datadog", {})
226+
dd_json_data = dd_payload.get("stringValue", dd_payload.get("Value", r"{}"))
197227
dd_data = json.loads(dd_json_data)
198228
trace_id = dd_data.get(TraceHeader.TRACE_ID)
199229
parent_id = dd_data.get(TraceHeader.PARENT_ID)
200230
sampling_priority = dd_data.get(TraceHeader.SAMPLING_PRIORITY)
201231

202232
return trace_id, parent_id, sampling_priority
203-
except Exception:
233+
except Exception as e:
234+
logger.debug("The trace extractor returned with error %s", e)
235+
return extract_context_from_lambda_context(lambda_context)
236+
237+
238+
def extract_context_from_eventbridge_event(event, lambda_context):
239+
"""
240+
Extract datadog trace context from an EventBridge message's Details.
241+
Details is often a weirdly escaped almost-JSON string. Here we have to correct for that.
242+
"""
243+
try:
244+
detail = event["detail"]
245+
dd_context = detail.get("_datadog")
246+
if not dd_context:
247+
return extract_context_from_lambda_context(lambda_context)
248+
trace_id = dd_context.get(TraceHeader.TRACE_ID)
249+
parent_id = dd_context.get(TraceHeader.PARENT_ID)
250+
sampling_priority = dd_context.get(TraceHeader.SAMPLING_PRIORITY)
251+
return trace_id, parent_id, sampling_priority
252+
except Exception as e:
253+
logger.debug("The trace extractor returned with error %s", e)
254+
return extract_context_from_lambda_context(lambda_context)
255+
256+
257+
def extract_context_from_kinesis_event(event, lambda_context):
258+
"""
259+
Extract datadog trace context from a Kinesis Stream's base64 encoded data string
260+
"""
261+
try:
262+
record = get_first_record(event)
263+
data = record.get("kinesis", {}).get("data", None)
264+
if data:
265+
b64_bytes = data.encode("ascii")
266+
str_bytes = base64.b64decode(b64_bytes)
267+
data_str = str_bytes.decode("ascii")
268+
data_obj = json.loads(data_str)
269+
dd_ctx = data_obj.get("_datadog")
270+
271+
if not dd_ctx:
272+
return extract_context_from_lambda_context(lambda_context)
273+
274+
trace_id = dd_ctx.get(TraceHeader.TRACE_ID)
275+
parent_id = dd_ctx.get(TraceHeader.PARENT_ID)
276+
sampling_priority = dd_ctx.get(TraceHeader.SAMPLING_PRIORITY)
277+
return trace_id, parent_id, sampling_priority
278+
except Exception as e:
279+
logger.debug("The trace extractor returned with error %s", e)
204280
return extract_context_from_lambda_context(lambda_context)
205281

206282

@@ -230,6 +306,7 @@ def extract_dd_trace_context(event, lambda_context, extractor=None):
230306
"""
231307
global dd_trace_context
232308
trace_context_source = None
309+
event_source = parse_event_source(event)
233310

234311
if extractor is not None:
235312
(
@@ -243,12 +320,24 @@ def extract_dd_trace_context(event, lambda_context, extractor=None):
243320
parent_id,
244321
sampling_priority,
245322
) = extract_context_from_http_event_or_context(event, lambda_context)
246-
elif "Records" in event:
323+
elif event_source.equals(EventTypes.SNS) or event_source.equals(EventTypes.SQS):
247324
(
248325
trace_id,
249326
parent_id,
250327
sampling_priority,
251-
) = extract_context_from_sqs_event_or_context(event, lambda_context)
328+
) = extract_context_from_sqs_or_sns_event_or_context(event, lambda_context)
329+
elif event_source.equals(EventTypes.EVENTBRIDGE):
330+
(
331+
trace_id,
332+
parent_id,
333+
sampling_priority,
334+
) = extract_context_from_eventbridge_event(event, lambda_context)
335+
elif event_source.equals(EventTypes.KINESIS):
336+
(
337+
trace_id,
338+
parent_id,
339+
sampling_priority,
340+
) = extract_context_from_kinesis_event(event, lambda_context)
252341
else:
253342
trace_id, parent_id, sampling_priority = extract_context_from_lambda_context(
254343
lambda_context
@@ -556,6 +645,8 @@ def create_inferred_span_from_http_api_event(event, context):
556645

557646

558647
def create_inferred_span_from_sqs_event(event, context):
648+
trace_ctx = tracer.current_trace_context()
649+
559650
event_record = get_first_record(event)
560651
event_source_arn = event_record["eventSourceARN"]
561652
queue_name = event_source_arn.split(":")[-1]
@@ -574,11 +665,37 @@ def create_inferred_span_from_sqs_event(event, context):
574665
"resource": queue_name,
575666
"span_type": "web",
576667
}
668+
start_time = int(request_time_epoch) / 1000
669+
670+
# logic to deal with SNS => SQS event
671+
sns_span = None
672+
if "body" in event_record:
673+
body_str = event_record.get("body", {})
674+
try:
675+
body = json.loads(body_str)
676+
if body.get("Type", "") == "Notification" and "TopicArn" in body:
677+
logger.debug("Found SNS message inside SQS event")
678+
sns_span = create_inferred_span_from_sns_event(
679+
create_sns_event(body), context
680+
)
681+
sns_span.finish(finish_time=start_time)
682+
except Exception as e:
683+
logger.debug(
684+
"Unable to create SNS span from SQS message, with error %s" % e
685+
)
686+
pass
687+
688+
# trace context needs to be set again as it is reset
689+
# when sns_span.finish executes
690+
tracer.context_provider.activate(trace_ctx)
577691
tracer.set_tags({"_dd.origin": "lambda"})
578692
span = tracer.trace("aws.sqs", **args)
579693
if span:
580694
span.set_tags(tags)
581-
span.start = int(request_time_epoch) / 1000
695+
span.start = start_time
696+
if sns_span:
697+
span.parent_id = sns_span.span_id
698+
582699
return span
583700

584701

@@ -594,9 +711,12 @@ def create_inferred_span_from_sns_event(event, context):
594711
"topic_arn": topic_arn,
595712
"message_id": sns_message["MessageId"],
596713
"type": sns_message["Type"],
597-
"subject": sns_message["Subject"],
598-
"event_subscription_arn": event_record["EventSubscriptionArn"],
599714
}
715+
716+
# Subject not available in SNS => SQS scenario
717+
if "Subject" in sns_message and sns_message["Subject"]:
718+
tags["subject"] = sns_message["Subject"]
719+
600720
InferredSpanInfo.set_tags(tags, tag_source="self", synchronicity="async")
601721
sns_dt_format = "%Y-%m-%dT%H:%M:%S.%fZ"
602722
timestamp = event_record["Sns"]["Timestamp"]
@@ -644,7 +764,7 @@ def create_inferred_span_from_kinesis_event(event, context):
644764
span = tracer.trace("aws.kinesis", **args)
645765
if span:
646766
span.set_tags(tags)
647-
span.start = int(request_time_epoch)
767+
span.start = request_time_epoch
648768
return span
649769

650770

@@ -662,7 +782,7 @@ def create_inferred_span_from_dynamodb_event(event, context):
662782
"event_name": event_record["eventName"],
663783
"event_version": event_record["eventVersion"],
664784
"stream_view_type": dynamodb_message["StreamViewType"],
665-
"size_bytes": dynamodb_message["SizeBytes"],
785+
"size_bytes": str(dynamodb_message["SizeBytes"]),
666786
}
667787
InferredSpanInfo.set_tags(tags, synchronicity="async", tag_source="self")
668788
request_time_epoch = event_record["dynamodb"]["ApproximateCreationDateTime"]
@@ -690,8 +810,8 @@ def create_inferred_span_from_s3_event(event, context):
690810
"bucketname": bucket_name,
691811
"bucket_arn": event_record["s3"]["bucket"]["arn"],
692812
"object_key": event_record["s3"]["object"]["key"],
693-
"object_size": event_record["s3"]["object"]["size"],
694-
"object_etag": event_record["s3"]["etag"],
813+
"object_size": str(event_record["s3"]["object"]["size"]),
814+
"object_etag": event_record["s3"]["object"]["eTag"],
695815
}
696816
InferredSpanInfo.set_tags(tags, synchronicity="async", tag_source="self")
697817
dt_format = "%Y-%m-%dT%H:%M:%S.%fZ"
@@ -786,7 +906,7 @@ def create_function_execution_span(
786906

787907

788908
class InferredSpanInfo(object):
789-
BASE_NAME = "inferred_span"
909+
BASE_NAME = "_inferred_span"
790910
SYNCHRONICITY = f"{BASE_NAME}.synchronicity"
791911
TAG_SOURCE = f"{BASE_NAME}.tag_source"
792912

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ datadog = "^0.41.0"
2828
wrapt = "^1.11.2"
2929
ddtrace = "^0.50.0"
3030
importlib_metadata = {version = "^1.0", python = "<3.8"}
31-
typing_extensions = {version = "^4.0", python = "<3.8"}
3231
boto3 = { version = "^1.10.33", optional = true }
32+
typing_extensions = {version = "^4.0", python = "<3.8"}
3333
requests = { version ="^2.22.0", optional = true }
3434
nose2 = { version= "^0.9.1", optional = true }
3535
flake8 = { version = "^3.7.9", optional = true }

scripts/run_integration_tests.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,9 @@ for handler_name in "${LAMBDA_HANDLERS[@]}"; do
212212
sed -E "s/(\"system\.pid\"\: )[0-9\.\-]+/\1\"XXXX\"/g" |
213213
sed -E "s/(\"runtime-id\"\: \")[a-z0-9\.\-]+/\1XXXX/g" |
214214
sed -E "s/(\"datadog_lambda\"\: \")([0-9]+\.[0-9]+\.[0-9])/\1X.X.X/g" |
215-
sed -E "s/(\"dd_trace\"\: \")([0-9]+\.[0-9]+\.[0-9])/\1X.X.X/g"
215+
sed -E "s/(\"dd_trace\"\: \")([0-9]+\.[0-9]+\.[0-9])/\1X.X.X/g" |
216+
sed -E "/init complete at epoch/d" |
217+
sed -E "/main started at epoch/d"
216218
)
217219

218220
if [ ! -f $function_snapshot_path ]; then

tests/event_samples/api-gateway-non-proxy-async.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
"X-Forwarded-For": "38.122.226.210, 70.132.52.143",
1919
"X-Forwarded-Port": "443",
2020
"X-Forwarded-Proto": "https",
21-
"X-Amz-Invocation-Type": "Event"
21+
"X-Amz-Invocation-Type": "Event",
22+
"x-datadog-trace-id": "12345",
23+
"x-datadog-parent-id": "67890",
24+
"x-datadog-sampling-priority": "2"
2225
},
2326
"multiValueHeaders": {
2427
"Accept": [

tests/event_samples/api-gateway-non-proxy.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717
"X-Amzn-Trace-Id": "Root=1-613a4da3-5012576973e2e5670d4c549a",
1818
"X-Forwarded-For": "38.122.226.210, 70.132.52.143",
1919
"X-Forwarded-Port": "443",
20-
"X-Forwarded-Proto": "https"
20+
"X-Forwarded-Proto": "https",
21+
"x-datadog-trace-id": "12345",
22+
"x-datadog-parent-id": "67890",
23+
"x-datadog-sampling-priority": "2"
2124
},
2225
"multiValueHeaders": {
2326
"Accept": [

tests/event_samples/api-gateway-websocket-connect.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
"X-Amzn-Trace-Id": "Root=1-613b6b23-34ae0ce37f8d09ae19095835",
88
"X-Forwarded-For": "38.122.226.210",
99
"X-Forwarded-Port": "443",
10-
"X-Forwarded-Proto": "https"
10+
"X-Forwarded-Proto": "https",
11+
"x-datadog-trace-id": "12345",
12+
"x-datadog-parent-id": "67890",
13+
"x-datadog-sampling-priority": "2"
1114
},
1215
"multiValueHeaders": {
1316
"Host": [

tests/event_samples/api-gateway-websocket-default.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,27 @@
11
{
2+
"headers": {
3+
"Host": "p62c47itsb.execute-api.sa-east-1.amazonaws.com",
4+
"x-api-key": "",
5+
"X-Forwarded-For": "",
6+
"x-restapi": "",
7+
"x-datadog-trace-id": "12345",
8+
"x-datadog-parent-id": "67890",
9+
"x-datadog-sampling-priority": "2"
10+
},
11+
"multiValueHeaders": {
12+
"Host": [
13+
"p62c47itsb.execute-api.sa-east-1.amazonaws.com"
14+
],
15+
"x-api-key": [
16+
""
17+
],
18+
"X-Forwarded-For": [
19+
""
20+
],
21+
"x-restapi": [
22+
""
23+
]
24+
},
225
"requestContext": {
326
"routeKey": "$default",
427
"messageId": "Fc5S3coemjQCJlg=",

tests/event_samples/api-gateway-websocket-disconnect.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
"Host": "p62c47itsb.execute-api.sa-east-1.amazonaws.com",
44
"x-api-key": "",
55
"X-Forwarded-For": "",
6-
"x-restapi": ""
6+
"x-restapi": "",
7+
"x-datadog-trace-id": "12345",
8+
"x-datadog-parent-id": "67890",
9+
"x-datadog-sampling-priority": "2"
710
},
811
"multiValueHeaders": {
912
"Host": [
@@ -39,4 +42,4 @@
3942
"apiId": "p62c47itsb"
4043
},
4144
"isBase64Encoded": false
42-
}
45+
}

tests/event_samples/api-gateway.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
"X-Forwarded-Port": "443",
3939
"X-Forwarded-Proto": "https",
4040
"X-Datadog-Trace-Id": "12345",
41-
"X-Datadog-Parent-Id": "67890"
41+
"X-Datadog-Parent-Id": "67890",
42+
"x-datadog-sampling-priority": "2"
4243
},
4344
"multiValueHeaders": {
4445
"Accept": [

tests/event_samples/application-load-balancer.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121
"x-forwarded-for": "72.12.164.125",
2222
"x-forwarded-port": "80",
2323
"x-forwarded-proto": "http",
24-
"x-imforwards": "20"
24+
"x-imforwards": "20",
25+
"x-datadog-trace-id": "12345",
26+
"x-datadog-parent-id": "67890",
27+
"x-datadog-sampling-priority": "2"
2528
},
2629
"body": "",
2730
"isBase64Encoded": false

0 commit comments

Comments
 (0)