Skip to content

Commit 3f7c439

Browse files
authored
Use sha256 to hash StepFunctions trace id and manually set _dd.p.tid (#490)
1 parent 32c454d commit 3f7c439

File tree

2 files changed

+59
-28
lines changed

2 files changed

+59
-28
lines changed

datadog_lambda/tracing.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@
7272
propagator = HTTPPropagator()
7373

7474
DD_TRACE_JAVA_TRACE_ID_PADDING = "00000000"
75+
HIGHER_64_BITS = "HIGHER_64_BITS"
76+
LOWER_64_BITS = "LOWER_64_BITS"
7577

7678

7779
def _convert_xray_trace_id(xray_trace_id):
@@ -354,14 +356,16 @@ def extract_context_from_kinesis_event(event, lambda_context):
354356
return extract_context_from_lambda_context(lambda_context)
355357

356358

357-
def _deterministic_md5_hash(s: str) -> int:
358-
"""MD5 here is to generate trace_id, not for any encryption."""
359-
hex_number = hashlib.md5(s.encode("ascii")).hexdigest()
360-
binary = bin(int(hex_number, 16))
361-
binary_str = str(binary)
362-
binary_str_remove_0b = binary_str[2:].rjust(128, "0")
363-
most_significant_64_bits_without_leading_1 = "0" + binary_str_remove_0b[1:-64]
364-
result = int(most_significant_64_bits_without_leading_1, 2)
359+
def _deterministic_sha256_hash(s: str, part: str) -> (int, int):
360+
sha256_hash = hashlib.sha256(s.encode()).hexdigest()
361+
362+
# First two chars is '0b'. zfill to ensure 256 bits, but we only care about the first 128 bits
363+
binary_hash = bin(int(sha256_hash, 16))[2:].zfill(256)
364+
if part == HIGHER_64_BITS:
365+
updated_binary_hash = "0" + binary_hash[1:64]
366+
else:
367+
updated_binary_hash = "0" + binary_hash[65:128]
368+
result = int(updated_binary_hash, 2)
365369
if result == 0:
366370
return 1
367371
return result
@@ -376,13 +380,27 @@ def extract_context_from_step_functions(event, lambda_context):
376380
execution_id = event.get("Execution").get("Id")
377381
state_name = event.get("State").get("Name")
378382
state_entered_time = event.get("State").get("EnteredTime")
379-
trace_id = _deterministic_md5_hash(execution_id)
380-
parent_id = _deterministic_md5_hash(
381-
f"{execution_id}#{state_name}#{state_entered_time}"
383+
# returning 128 bits since 128bit traceId will be break up into
384+
# traditional traceId and _dd.p.tid tag
385+
# https://github.com/DataDog/dd-trace-py/blob/3e34d21cb9b5e1916e549047158cb119317b96ab/ddtrace/propagation/http.py#L232-L240
386+
trace_id = _deterministic_sha256_hash(execution_id, LOWER_64_BITS)
387+
388+
parent_id = _deterministic_sha256_hash(
389+
f"{execution_id}#{state_name}#{state_entered_time}", HIGHER_64_BITS
382390
)
391+
383392
sampling_priority = SamplingPriority.AUTO_KEEP
384393
return Context(
385-
trace_id=trace_id, span_id=parent_id, sampling_priority=sampling_priority
394+
trace_id=trace_id,
395+
span_id=parent_id,
396+
sampling_priority=sampling_priority,
397+
# take the higher 64 bits as _dd.p.tid tag and use hex to encode
398+
# [2:] to remove '0x' in the hex str
399+
meta={
400+
"_dd.p.tid": hex(
401+
_deterministic_sha256_hash(execution_id, HIGHER_64_BITS)
402+
)[2:]
403+
},
386404
)
387405
except Exception as e:
388406
logger.debug("The Step Functions trace extractor returned with error %s", e)
@@ -1246,9 +1264,9 @@ def create_function_execution_span(
12461264
"function_version": function_version,
12471265
"request_id": context.aws_request_id,
12481266
"resource_names": context.function_name,
1249-
"functionname": context.function_name.lower()
1250-
if context.function_name
1251-
else None,
1267+
"functionname": (
1268+
context.function_name.lower() if context.function_name else None
1269+
),
12521270
"datadog_lambda": datadog_lambda_version,
12531271
"dd_trace": ddtrace_version,
12541272
"span.name": "aws.lambda",

tests/test_tracing.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
XraySubsegment,
2121
)
2222
from datadog_lambda.tracing import (
23-
_deterministic_md5_hash,
23+
HIGHER_64_BITS,
24+
LOWER_64_BITS,
25+
_deterministic_sha256_hash,
2426
create_inferred_span,
2527
extract_dd_trace_context,
2628
create_dd_dummy_metadata_subsegment,
@@ -624,17 +626,19 @@ def test_step_function_trace_data(self):
624626
ctx, source, event_source = extract_dd_trace_context(sqs_event, lambda_ctx)
625627
self.assertEqual(source, "event")
626628
expected_context = Context(
627-
trace_id=1074655265866231755,
628-
span_id=4776286484851030060,
629+
trace_id=3675572987363469717,
630+
span_id=6880978411788117524,
629631
sampling_priority=1,
632+
meta={"_dd.p.tid": "e987c84b36b11ab"},
630633
)
631634
self.assertEqual(ctx, expected_context)
632635
self.assertEqual(
633636
get_dd_trace_context(),
634637
{
635-
TraceHeader.TRACE_ID: "1074655265866231755",
636-
TraceHeader.PARENT_ID: fake_xray_header_value_parent_decimal,
638+
TraceHeader.TRACE_ID: "3675572987363469717",
639+
TraceHeader.PARENT_ID: "10713633173203262661",
637640
TraceHeader.SAMPLING_PRIORITY: "1",
641+
"x-datadog-tags": "_dd.p.tid=e987c84b36b11ab",
638642
},
639643
)
640644
create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY)
@@ -1992,19 +1996,28 @@ def test_mark_trace_as_error_for_5xx_responses_sends_error_metric_and_set_error_
19921996

19931997
class TestStepFunctionsTraceContext(unittest.TestCase):
19941998
def test_deterministic_m5_hash(self):
1995-
result = _deterministic_md5_hash("some_testing_random_string")
1996-
self.assertEqual(2251275791555400689, result)
1999+
result = _deterministic_sha256_hash("some_testing_random_string", LOWER_64_BITS)
2000+
self.assertEqual(7456137785171041414, result)
2001+
2002+
def test_deterministic_m5_hash__result_the_same_as_backend_1(self):
2003+
result = _deterministic_sha256_hash(
2004+
"arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j"
2005+
"#lambda#1",
2006+
HIGHER_64_BITS,
2007+
)
2008+
self.assertEqual(3711631873188331089, result)
19972009

1998-
def test_deterministic_m5_hash__result_the_same_as_backend(self):
1999-
result = _deterministic_md5_hash(
2000-
"arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a"
2001-
":c8baf081-31f1-464d-971f-70cb17d01111#step-one#2022-12-08T21:08:19.224Z"
2010+
def test_deterministic_m5_hash__result_the_same_as_backend_2(self):
2011+
result = _deterministic_sha256_hash(
2012+
"arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j"
2013+
"#lambda#2",
2014+
HIGHER_64_BITS,
20022015
)
2003-
self.assertEqual(8034507082463708833, result)
2016+
self.assertEqual(5759173372325510050, result)
20042017

20052018
def test_deterministic_m5_hash__always_leading_with_zero(self):
20062019
for i in range(100):
2007-
result = _deterministic_md5_hash(str(i))
2020+
result = _deterministic_sha256_hash(str(i), 64)
20082021
result_in_binary = bin(int(result))
20092022
# Leading zeros will be omitted, so only test for full 64 bits present
20102023
if len(result_in_binary) == 66: # "0b" + 64 bits.

0 commit comments

Comments
 (0)