diff --git a/ci/input_files/build.yaml.tpl b/ci/input_files/build.yaml.tpl index 57f9ff0e..97e5987f 100644 --- a/ci/input_files/build.yaml.tpl +++ b/ci/input_files/build.yaml.tpl @@ -97,7 +97,7 @@ integration-test ({{ $runtime.name }}-{{ $runtime.arch }}): before_script: - *install-node - EXTERNAL_ID_NAME=integration-test-externalid ROLE_TO_ASSUME=sandbox-integration-test-deployer AWS_ACCOUNT=425362996713 source ./ci/get_secrets.sh - - yarn global add serverless --prefix /usr/local + - yarn global add serverless@^3.38.0 --prefix /usr/local - cd integration_tests && yarn install && cd .. script: - RUNTIME_PARAM={{ $runtime.python_version }} ARCH={{ $runtime.arch }} ./scripts/run_integration_tests.sh diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index 73ac1e5c..64a19ccd 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -72,6 +72,8 @@ propagator = HTTPPropagator() DD_TRACE_JAVA_TRACE_ID_PADDING = "00000000" +HIGHER_64_BITS = "HIGHER_64_BITS" +LOWER_64_BITS = "LOWER_64_BITS" def _convert_xray_trace_id(xray_trace_id): @@ -354,14 +356,16 @@ def extract_context_from_kinesis_event(event, lambda_context): return extract_context_from_lambda_context(lambda_context) -def _deterministic_md5_hash(s: str) -> int: - """MD5 here is to generate trace_id, not for any encryption.""" - hex_number = hashlib.md5(s.encode("ascii")).hexdigest() - binary = bin(int(hex_number, 16)) - binary_str = str(binary) - binary_str_remove_0b = binary_str[2:].rjust(128, "0") - most_significant_64_bits_without_leading_1 = "0" + binary_str_remove_0b[1:-64] - result = int(most_significant_64_bits_without_leading_1, 2) +def _deterministic_sha256_hash(s: str, part: str) -> (int, int): + sha256_hash = hashlib.sha256(s.encode()).hexdigest() + + # First two chars is '0b'. zfill to ensure 256 bits, but we only care about the first 128 bits + binary_hash = bin(int(sha256_hash, 16))[2:].zfill(256) + if part == HIGHER_64_BITS: + updated_binary_hash = "0" + binary_hash[1:64] + else: + updated_binary_hash = "0" + binary_hash[65:128] + result = int(updated_binary_hash, 2) if result == 0: return 1 return result @@ -376,13 +380,27 @@ def extract_context_from_step_functions(event, lambda_context): execution_id = event.get("Execution").get("Id") state_name = event.get("State").get("Name") state_entered_time = event.get("State").get("EnteredTime") - trace_id = _deterministic_md5_hash(execution_id) - parent_id = _deterministic_md5_hash( - f"{execution_id}#{state_name}#{state_entered_time}" + # returning 128 bits since 128bit traceId will be break up into + # traditional traceId and _dd.p.tid tag + # https://github.com/DataDog/dd-trace-py/blob/3e34d21cb9b5e1916e549047158cb119317b96ab/ddtrace/propagation/http.py#L232-L240 + trace_id = _deterministic_sha256_hash(execution_id, LOWER_64_BITS) + + parent_id = _deterministic_sha256_hash( + f"{execution_id}#{state_name}#{state_entered_time}", HIGHER_64_BITS ) + sampling_priority = SamplingPriority.AUTO_KEEP return Context( - trace_id=trace_id, span_id=parent_id, sampling_priority=sampling_priority + trace_id=trace_id, + span_id=parent_id, + sampling_priority=sampling_priority, + # take the higher 64 bits as _dd.p.tid tag and use hex to encode + # [2:] to remove '0x' in the hex str + meta={ + "_dd.p.tid": hex( + _deterministic_sha256_hash(execution_id, HIGHER_64_BITS) + )[2:] + }, ) except Exception as e: logger.debug("The Step Functions trace extractor returned with error %s", e) @@ -1246,9 +1264,9 @@ def create_function_execution_span( "function_version": function_version, "request_id": context.aws_request_id, "resource_names": context.function_name, - "functionname": context.function_name.lower() - if context.function_name - else None, + "functionname": ( + context.function_name.lower() if context.function_name else None + ), "datadog_lambda": datadog_lambda_version, "dd_trace": ddtrace_version, "span.name": "aws.lambda", diff --git a/tests/test_tracing.py b/tests/test_tracing.py index a810eb8a..b94e968f 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -20,7 +20,9 @@ XraySubsegment, ) from datadog_lambda.tracing import ( - _deterministic_md5_hash, + HIGHER_64_BITS, + LOWER_64_BITS, + _deterministic_sha256_hash, create_inferred_span, extract_dd_trace_context, create_dd_dummy_metadata_subsegment, @@ -624,17 +626,19 @@ def test_step_function_trace_data(self): ctx, source, event_source = extract_dd_trace_context(sqs_event, lambda_ctx) self.assertEqual(source, "event") expected_context = Context( - trace_id=1074655265866231755, - span_id=4776286484851030060, + trace_id=3675572987363469717, + span_id=6880978411788117524, sampling_priority=1, + meta={"_dd.p.tid": "e987c84b36b11ab"}, ) self.assertEqual(ctx, expected_context) self.assertEqual( get_dd_trace_context(), { - TraceHeader.TRACE_ID: "1074655265866231755", - TraceHeader.PARENT_ID: fake_xray_header_value_parent_decimal, + TraceHeader.TRACE_ID: "3675572987363469717", + TraceHeader.PARENT_ID: "10713633173203262661", TraceHeader.SAMPLING_PRIORITY: "1", + "x-datadog-tags": "_dd.p.tid=e987c84b36b11ab", }, ) create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) @@ -1992,19 +1996,28 @@ def test_mark_trace_as_error_for_5xx_responses_sends_error_metric_and_set_error_ class TestStepFunctionsTraceContext(unittest.TestCase): def test_deterministic_m5_hash(self): - result = _deterministic_md5_hash("some_testing_random_string") - self.assertEqual(2251275791555400689, result) + result = _deterministic_sha256_hash("some_testing_random_string", LOWER_64_BITS) + self.assertEqual(7456137785171041414, result) + + def test_deterministic_m5_hash__result_the_same_as_backend_1(self): + result = _deterministic_sha256_hash( + "arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j" + "#lambda#1", + HIGHER_64_BITS, + ) + self.assertEqual(3711631873188331089, result) - def test_deterministic_m5_hash__result_the_same_as_backend(self): - result = _deterministic_md5_hash( - "arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a" - ":c8baf081-31f1-464d-971f-70cb17d01111#step-one#2022-12-08T21:08:19.224Z" + def test_deterministic_m5_hash__result_the_same_as_backend_2(self): + result = _deterministic_sha256_hash( + "arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j" + "#lambda#2", + HIGHER_64_BITS, ) - self.assertEqual(8034507082463708833, result) + self.assertEqual(5759173372325510050, result) def test_deterministic_m5_hash__always_leading_with_zero(self): for i in range(100): - result = _deterministic_md5_hash(str(i)) + result = _deterministic_sha256_hash(str(i), 64) result_in_binary = bin(int(result)) # Leading zeros will be omitted, so only test for full 64 bits present if len(result_in_binary) == 66: # "0b" + 64 bits.