From bb27697a45c3444ffe3f6b6bc42d66e1c534b356 Mon Sep 17 00:00:00 2001 From: Tian Chu Date: Sun, 8 Sep 2019 00:42:12 -0400 Subject: [PATCH] Support trace and log correlation --- README.md | 30 +++++++++++++++++ datadog_lambda/tracing.py | 69 +++++++++++++++++++++++++++++++-------- datadog_lambda/wrapper.py | 18 ++++++++-- setup.py | 1 + tests/test_tracing.py | 20 ++++++++++++ tests/test_wrapper.py | 28 ++++++++++++++-- 6 files changed, 148 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 02adb028..6adbcbab 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,14 @@ If your Lambda function powers a performance-critical task (e.g., a consumer-fac - DD_FLUSH_TO_LOG +To connect logs and traces, set the environment variable below to `True`. The default format of the AWS provided `LambdaLoggerHandler` will be overridden to inject `dd.trace_id` and `dd.span_id`. The default Datadog lambda log integration pipeline will automatically parse them and map the `dd.trace_id` into the reserved [trace_id attribute](https://docs.datadoghq.com/logs/processing/#trace-id-attribute). + +- DD_LOGS_INJECTION + +To debug the Datadog Lambda Layer, set the environment variable below to `DEBUG`. + +- DD_LOG_LEVEL + ### The Serverless Framework If your Lambda function is deployed using the Serverless Framework, refer to this sample `serverless.yml`. @@ -211,6 +219,28 @@ If your Lambda function is triggered by API Gateway via [the non-proxy integrati If your Lambda function is deployed by the Serverless Framework, such a mapping template gets created by default. +## Log and Trace Correlations + +To connect logs and traces, set the environment variable `DD_LOGS_INJECTION` to `True`. The log format of the AWS provided `LambdaLoggerHandler` will be overridden to inject `dd.trace_id` and `dd.span_id`. The default Datadog lambda log integration pipeline will automatically parse them and map the `dd.trace_id` into the reserved attribute [trace_id](https://docs.datadoghq.com/logs/processing/#trace-id-attribute). + +If you use a custom logger handler to log in json, you can manually inject the ids using the helper function `get_correlation_ids`. + +```python +from datadog_lambda.wrapper import datadog_lambda_wrapper +from ddtrace.helpers import get_correlation_ids + +@datadog_lambda_wrapper +def lambda_handler(event, context): + trace_id, span_id = get_correlation_ids() + logger.info({ + "message": "hello world", + "dd": { + "trace_id": trace_id, + "span_id": span_id + } + }) +``` + ## Opening Issues If you encounter a bug with this package, we want to hear about it. Before opening a new issue, search the existing issues to avoid duplicates. diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index 8c0be584..2de444a5 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -7,6 +7,7 @@ from aws_xray_sdk.core import xray_recorder +from ddtrace import patch, tracer from datadog_lambda.constants import ( SamplingPriority, TraceHeader, @@ -18,6 +19,28 @@ dd_trace_context = {} +def _convert_xray_trace_id(xray_trace_id): + """ + Convert X-Ray trace id (hex)'s last 63 bits to a Datadog trace id (int). + """ + return str(0x7FFFFFFFFFFFFFFF & int(xray_trace_id[-16:], 16)) + + +def _convert_xray_entity_id(xray_entity_id): + """ + Convert X-Ray (sub)segement id (hex) to a Datadog span id (int). + """ + return str(int(xray_entity_id, 16)) + + +def _convert_xray_sampling(xray_sampled): + """ + Convert X-Ray sampled (True/False) to its Datadog counterpart. + """ + return str(SamplingPriority.USER_KEEP) if xray_sampled \ + else str(SamplingPriority.USER_REJECT) + + def extract_dd_trace_context(event): """ Extract Datadog trace context from the Lambda `event` object. @@ -56,6 +79,8 @@ def extract_dd_trace_context(event): # reset to avoid using the context from the last invocation. dd_trace_context = {} + logger.debug('extracted dd trace context %s', dd_trace_context) + def get_dd_trace_context(): """ @@ -91,23 +116,41 @@ def get_dd_trace_context(): } -def _convert_xray_trace_id(xray_trace_id): +def set_correlation_ids(): """ - Convert X-Ray trace id (hex)'s last 63 bits to a Datadog trace id (int). - """ - return str(0x7FFFFFFFFFFFFFFF & int(xray_trace_id[-16:], 16)) + Create a dummy span, and overrides its trace_id and span_id, to make + ddtrace.helpers.get_correlation_ids() return the correct ids for both + auto and manual log correlations. - -def _convert_xray_entity_id(xray_entity_id): + TODO: Remove me when Datadog tracer is natively supported in Lambda. """ - Convert X-Ray (sub)segement id (hex) to a Datadog span id (int). - """ - return str(int(xray_entity_id, 16)) + context = get_dd_trace_context() + span = tracer.trace('dummy.span') + span.trace_id = context[TraceHeader.TRACE_ID] + span.span_id = context[TraceHeader.PARENT_ID] + logger.debug('correlation ids set') -def _convert_xray_sampling(xray_sampled): + +def inject_correlation_ids(): """ - Convert X-Ray sampled (True/False) to its Datadog counterpart. + Override the formatter of LambdaLoggerHandler to inject datadog trace and + span id for log correlation. + + For manual injections to custom log handlers, use `ddtrace.helpers.get_correlation_ids` + to retrieve correlation ids (trace_id, span_id). """ - return str(SamplingPriority.USER_KEEP) if xray_sampled \ - else str(SamplingPriority.USER_REJECT) + # Override the log format of the AWS provided LambdaLoggerHandler + root_logger = logging.getLogger() + for handler in root_logger.handlers: + if handler.__class__.__name__ == 'LambdaLoggerHandler': + handler.setFormatter(logging.Formatter( + '[%(levelname)s]\t%(asctime)s.%(msecs)dZ\t%(aws_request_id)s\t' + '[dd.trace_id=%(dd.trace_id)s dd.span_id=%(dd.span_id)s]\t%(message)s\n', + '%Y-%m-%dT%H:%M:%S' + )) + + # Patch `logging.Logger.makeRecord` to actually inject correlation ids + patch(logging=True) + + logger.debug('logs injection configured') diff --git a/datadog_lambda/wrapper.py b/datadog_lambda/wrapper.py index 5381f20c..ab43115a 100644 --- a/datadog_lambda/wrapper.py +++ b/datadog_lambda/wrapper.py @@ -8,8 +8,12 @@ import traceback from datadog_lambda.metric import lambda_stats -from datadog_lambda.tracing import extract_dd_trace_context from datadog_lambda.patch import patch_all +from datadog_lambda.tracing import ( + extract_dd_trace_context, + set_correlation_ids, + inject_correlation_ids, +) logger = logging.getLogger(__name__) @@ -37,6 +41,14 @@ class _LambdaDecorator(object): def __init__(self, func): self.func = func self.flush_to_log = os.environ.get('DD_FLUSH_TO_LOG', '').lower() == 'true' + self.logs_injection = os.environ.get('DD_LOGS_INJECTION', '').lower() == 'true' + + # Inject trace correlation ids to logs + if self.logs_injection: + inject_correlation_ids() + + # Patch HTTP clients to propagate Datadog trace context + patch_all() logger.debug('datadog_lambda_wrapper initialized') def _before(self, event, context): @@ -44,8 +56,8 @@ def _before(self, event, context): # Extract Datadog trace context from incoming requests extract_dd_trace_context(event) - # Patch HTTP clients to propogate Datadog trace context - patch_all() + # Set log correlation ids using extracted trace context + set_correlation_ids() except Exception: traceback.print_exc() diff --git a/setup.py b/setup.py index bb1b8a71..c9e30978 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ install_requires=[ 'aws-xray-sdk==2.4.2', 'datadog==0.28.0', + 'ddtrace==0.28.0', 'wrapt==1.11.1', 'setuptools==40.8.0', 'boto3==1.9.160' diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 4d0776f2..30755c15 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -4,6 +4,7 @@ except ImportError: from mock import MagicMock, patch +from ddtrace.helpers import get_correlation_ids from datadog_lambda.constants import ( SamplingPriority, TraceHeader, @@ -12,6 +13,7 @@ from datadog_lambda.tracing import ( extract_dd_trace_context, get_dd_trace_context, + set_correlation_ids, _convert_xray_trace_id, _convert_xray_entity_id, _convert_xray_sampling, @@ -150,3 +152,21 @@ def test_convert_xray_sampling(self): _convert_xray_sampling(False), str(SamplingPriority.USER_REJECT) ) + + +class TestLogsInjection(unittest.TestCase): + + def setUp(self): + patcher = patch('datadog_lambda.tracing.get_dd_trace_context') + self.mock_get_dd_trace_context = patcher.start() + self.mock_get_dd_trace_context.return_value = { + TraceHeader.TRACE_ID: '123', + TraceHeader.PARENT_ID: '456', + } + self.addCleanup(patcher.stop) + + def test_set_correlation_ids(self): + set_correlation_ids() + trace_id, span_id = get_correlation_ids() + self.assertEqual(trace_id, '123') + self.assertEqual(span_id, '456') diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index 70081ebb..b1fac540 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -24,6 +24,14 @@ def setUp(self): self.mock_extract_dd_trace_context = patcher.start() self.addCleanup(patcher.stop) + patcher = patch('datadog_lambda.wrapper.set_correlation_ids') + self.mock_set_correlation_ids = patcher.start() + self.addCleanup(patcher.stop) + + patcher = patch('datadog_lambda.wrapper.inject_correlation_ids') + self.mock_inject_correlation_ids = patcher.start() + self.addCleanup(patcher.stop) + patcher = patch('datadog_lambda.wrapper.patch_all') self.mock_patch_all = patcher.start() self.addCleanup(patcher.stop) @@ -42,6 +50,8 @@ def lambda_handler(event, context): ]) self.mock_wrapper_lambda_stats.flush.assert_called() self.mock_extract_dd_trace_context.assert_called_with(lambda_event) + self.mock_set_correlation_ids.assert_called() + self.mock_inject_correlation_ids.assert_not_called() self.mock_patch_all.assert_called() def test_datadog_lambda_wrapper_flush_to_log(self): @@ -57,7 +67,21 @@ def lambda_handler(event, context): self.mock_metric_lambda_stats.distribution.assert_not_called() self.mock_wrapper_lambda_stats.flush.assert_not_called() - self.mock_extract_dd_trace_context.assert_called_with(lambda_event) - self.mock_patch_all.assert_called() del os.environ["DD_FLUSH_TO_LOG"] + + def test_datadog_lambda_wrapper_inject_correlation_ids(self): + os.environ["DD_LOGS_INJECTION"] = 'True' + + @datadog_lambda_wrapper + def lambda_handler(event, context): + lambda_metric("test.metric", 100) + + lambda_event = {} + lambda_context = {} + lambda_handler(lambda_event, lambda_context) + + self.mock_set_correlation_ids.assert_called() + self.mock_inject_correlation_ids.assert_called() + + del os.environ["DD_LOGS_INJECTION"]