From bb27697a45c3444ffe3f6b6bc42d66e1c534b356 Mon Sep 17 00:00:00 2001
From: Tian Chu <tian.chu@datadoghq.com>
Date: Sun, 8 Sep 2019 00:42:12 -0400
Subject: [PATCH] Support trace and log correlation

---
 README.md                 | 30 +++++++++++++++++
 datadog_lambda/tracing.py | 69 +++++++++++++++++++++++++++++++--------
 datadog_lambda/wrapper.py | 18 ++++++++--
 setup.py                  |  1 +
 tests/test_tracing.py     | 20 ++++++++++++
 tests/test_wrapper.py     | 28 ++++++++++++++--
 6 files changed, 148 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 02adb028..6adbcbab 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,14 @@ If your Lambda function powers a performance-critical task (e.g., a consumer-fac
 
 - DD_FLUSH_TO_LOG
 
+To connect logs and traces, set the environment variable below to `True`. The default format of the AWS provided `LambdaLoggerHandler` will be overridden to inject `dd.trace_id` and `dd.span_id`. The default Datadog lambda log integration pipeline will automatically parse them and map the `dd.trace_id` into the reserved [trace_id attribute](https://docs.datadoghq.com/logs/processing/#trace-id-attribute).
+
+- DD_LOGS_INJECTION
+
+To debug the Datadog Lambda Layer, set the environment variable below to `DEBUG`.
+
+- DD_LOG_LEVEL
+
 ### The Serverless Framework
 
 If your Lambda function is deployed using the Serverless Framework, refer to this sample `serverless.yml`.
@@ -211,6 +219,28 @@ If your Lambda function is triggered by API Gateway via [the non-proxy integrati
 
 If your Lambda function is deployed by the Serverless Framework, such a mapping template gets created by default.
 
+## Log and Trace Correlations
+
+To connect logs and traces, set the environment variable `DD_LOGS_INJECTION` to `True`. The log format of the AWS provided `LambdaLoggerHandler` will be overridden to inject `dd.trace_id` and `dd.span_id`. The default Datadog lambda log integration pipeline will automatically parse them and map the `dd.trace_id` into the reserved attribute [trace_id](https://docs.datadoghq.com/logs/processing/#trace-id-attribute).
+
+If you use a custom logger handler to log in json, you can manually inject the ids using the helper function `get_correlation_ids`.
+
+```python
+from datadog_lambda.wrapper import datadog_lambda_wrapper
+from ddtrace.helpers import get_correlation_ids
+
+@datadog_lambda_wrapper
+def lambda_handler(event, context):
+  trace_id, span_id = get_correlation_ids()
+  logger.info({
+    "message": "hello world",
+    "dd": {
+      "trace_id": trace_id,
+      "span_id": span_id
+    }
+  })
+```
+
 ## Opening Issues
 
 If you encounter a bug with this package, we want to hear about it. Before opening a new issue, search the existing issues to avoid duplicates.
diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py
index 8c0be584..2de444a5 100644
--- a/datadog_lambda/tracing.py
+++ b/datadog_lambda/tracing.py
@@ -7,6 +7,7 @@
 
 from aws_xray_sdk.core import xray_recorder
 
+from ddtrace import patch, tracer
 from datadog_lambda.constants import (
     SamplingPriority,
     TraceHeader,
@@ -18,6 +19,28 @@
 dd_trace_context = {}
 
 
+def _convert_xray_trace_id(xray_trace_id):
+    """
+    Convert X-Ray trace id (hex)'s last 63 bits to a Datadog trace id (int).
+    """
+    return str(0x7FFFFFFFFFFFFFFF & int(xray_trace_id[-16:], 16))
+
+
+def _convert_xray_entity_id(xray_entity_id):
+    """
+    Convert X-Ray (sub)segement id (hex) to a Datadog span id (int).
+    """
+    return str(int(xray_entity_id, 16))
+
+
+def _convert_xray_sampling(xray_sampled):
+    """
+    Convert X-Ray sampled (True/False) to its Datadog counterpart.
+    """
+    return str(SamplingPriority.USER_KEEP) if xray_sampled \
+        else str(SamplingPriority.USER_REJECT)
+
+
 def extract_dd_trace_context(event):
     """
     Extract Datadog trace context from the Lambda `event` object.
@@ -56,6 +79,8 @@ def extract_dd_trace_context(event):
         # reset to avoid using the context from the last invocation.
         dd_trace_context = {}
 
+    logger.debug('extracted dd trace context %s', dd_trace_context)
+
 
 def get_dd_trace_context():
     """
@@ -91,23 +116,41 @@ def get_dd_trace_context():
         }
 
 
-def _convert_xray_trace_id(xray_trace_id):
+def set_correlation_ids():
     """
-    Convert X-Ray trace id (hex)'s last 63 bits to a Datadog trace id (int).
-    """
-    return str(0x7FFFFFFFFFFFFFFF & int(xray_trace_id[-16:], 16))
+    Create a dummy span, and overrides its trace_id and span_id, to make
+    ddtrace.helpers.get_correlation_ids() return the correct ids for both
+    auto and manual log correlations.
 
-
-def _convert_xray_entity_id(xray_entity_id):
+    TODO: Remove me when Datadog tracer is natively supported in Lambda.
     """
-    Convert X-Ray (sub)segement id (hex) to a Datadog span id (int).
-    """
-    return str(int(xray_entity_id, 16))
+    context = get_dd_trace_context()
+    span = tracer.trace('dummy.span')
+    span.trace_id = context[TraceHeader.TRACE_ID]
+    span.span_id = context[TraceHeader.PARENT_ID]
 
+    logger.debug('correlation ids set')
 
-def _convert_xray_sampling(xray_sampled):
+
+def inject_correlation_ids():
     """
-    Convert X-Ray sampled (True/False) to its Datadog counterpart.
+    Override the formatter of LambdaLoggerHandler to inject datadog trace and
+    span id for log correlation.
+
+    For manual injections to custom log handlers, use `ddtrace.helpers.get_correlation_ids`
+    to retrieve correlation ids (trace_id, span_id).
     """
-    return str(SamplingPriority.USER_KEEP) if xray_sampled \
-        else str(SamplingPriority.USER_REJECT)
+    # Override the log format of the AWS provided LambdaLoggerHandler
+    root_logger = logging.getLogger()
+    for handler in root_logger.handlers:
+        if handler.__class__.__name__ == 'LambdaLoggerHandler':
+            handler.setFormatter(logging.Formatter(
+                '[%(levelname)s]\t%(asctime)s.%(msecs)dZ\t%(aws_request_id)s\t'
+                '[dd.trace_id=%(dd.trace_id)s dd.span_id=%(dd.span_id)s]\t%(message)s\n',
+                '%Y-%m-%dT%H:%M:%S'
+            ))
+
+    # Patch `logging.Logger.makeRecord` to actually inject correlation ids
+    patch(logging=True)
+
+    logger.debug('logs injection configured')
diff --git a/datadog_lambda/wrapper.py b/datadog_lambda/wrapper.py
index 5381f20c..ab43115a 100644
--- a/datadog_lambda/wrapper.py
+++ b/datadog_lambda/wrapper.py
@@ -8,8 +8,12 @@
 import traceback
 
 from datadog_lambda.metric import lambda_stats
-from datadog_lambda.tracing import extract_dd_trace_context
 from datadog_lambda.patch import patch_all
+from datadog_lambda.tracing import (
+    extract_dd_trace_context,
+    set_correlation_ids,
+    inject_correlation_ids,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -37,6 +41,14 @@ class _LambdaDecorator(object):
     def __init__(self, func):
         self.func = func
         self.flush_to_log = os.environ.get('DD_FLUSH_TO_LOG', '').lower() == 'true'
+        self.logs_injection = os.environ.get('DD_LOGS_INJECTION', '').lower() == 'true'
+
+        # Inject trace correlation ids to logs
+        if self.logs_injection:
+            inject_correlation_ids()
+
+        # Patch HTTP clients to propagate Datadog trace context
+        patch_all()
         logger.debug('datadog_lambda_wrapper initialized')
 
     def _before(self, event, context):
@@ -44,8 +56,8 @@ def _before(self, event, context):
             # Extract Datadog trace context from incoming requests
             extract_dd_trace_context(event)
 
-            # Patch HTTP clients to propogate Datadog trace context
-            patch_all()
+            # Set log correlation ids using extracted trace context
+            set_correlation_ids()
         except Exception:
             traceback.print_exc()
 
diff --git a/setup.py b/setup.py
index bb1b8a71..c9e30978 100644
--- a/setup.py
+++ b/setup.py
@@ -29,6 +29,7 @@
     install_requires=[
         'aws-xray-sdk==2.4.2',
         'datadog==0.28.0',
+        'ddtrace==0.28.0',
         'wrapt==1.11.1',
         'setuptools==40.8.0',
         'boto3==1.9.160'
diff --git a/tests/test_tracing.py b/tests/test_tracing.py
index 4d0776f2..30755c15 100644
--- a/tests/test_tracing.py
+++ b/tests/test_tracing.py
@@ -4,6 +4,7 @@
 except ImportError:
     from mock import MagicMock, patch
 
+from ddtrace.helpers import get_correlation_ids
 from datadog_lambda.constants import (
     SamplingPriority,
     TraceHeader,
@@ -12,6 +13,7 @@
 from datadog_lambda.tracing import (
     extract_dd_trace_context,
     get_dd_trace_context,
+    set_correlation_ids,
     _convert_xray_trace_id,
     _convert_xray_entity_id,
     _convert_xray_sampling,
@@ -150,3 +152,21 @@ def test_convert_xray_sampling(self):
             _convert_xray_sampling(False),
             str(SamplingPriority.USER_REJECT)
         )
+
+
+class TestLogsInjection(unittest.TestCase):
+
+    def setUp(self):
+        patcher = patch('datadog_lambda.tracing.get_dd_trace_context')
+        self.mock_get_dd_trace_context = patcher.start()
+        self.mock_get_dd_trace_context.return_value = {
+            TraceHeader.TRACE_ID: '123',
+            TraceHeader.PARENT_ID: '456',
+        }
+        self.addCleanup(patcher.stop)
+
+    def test_set_correlation_ids(self):
+        set_correlation_ids()
+        trace_id, span_id = get_correlation_ids()
+        self.assertEqual(trace_id, '123')
+        self.assertEqual(span_id, '456')
diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py
index 70081ebb..b1fac540 100644
--- a/tests/test_wrapper.py
+++ b/tests/test_wrapper.py
@@ -24,6 +24,14 @@ def setUp(self):
         self.mock_extract_dd_trace_context = patcher.start()
         self.addCleanup(patcher.stop)
 
+        patcher = patch('datadog_lambda.wrapper.set_correlation_ids')
+        self.mock_set_correlation_ids = patcher.start()
+        self.addCleanup(patcher.stop)
+
+        patcher = patch('datadog_lambda.wrapper.inject_correlation_ids')
+        self.mock_inject_correlation_ids = patcher.start()
+        self.addCleanup(patcher.stop)
+
         patcher = patch('datadog_lambda.wrapper.patch_all')
         self.mock_patch_all = patcher.start()
         self.addCleanup(patcher.stop)
@@ -42,6 +50,8 @@ def lambda_handler(event, context):
         ])
         self.mock_wrapper_lambda_stats.flush.assert_called()
         self.mock_extract_dd_trace_context.assert_called_with(lambda_event)
+        self.mock_set_correlation_ids.assert_called()
+        self.mock_inject_correlation_ids.assert_not_called()
         self.mock_patch_all.assert_called()
 
     def test_datadog_lambda_wrapper_flush_to_log(self):
@@ -57,7 +67,21 @@ def lambda_handler(event, context):
 
         self.mock_metric_lambda_stats.distribution.assert_not_called()
         self.mock_wrapper_lambda_stats.flush.assert_not_called()
-        self.mock_extract_dd_trace_context.assert_called_with(lambda_event)
-        self.mock_patch_all.assert_called()
 
         del os.environ["DD_FLUSH_TO_LOG"]
+
+    def test_datadog_lambda_wrapper_inject_correlation_ids(self):
+        os.environ["DD_LOGS_INJECTION"] = 'True'
+
+        @datadog_lambda_wrapper
+        def lambda_handler(event, context):
+            lambda_metric("test.metric", 100)
+
+        lambda_event = {}
+        lambda_context = {}
+        lambda_handler(lambda_event, lambda_context)
+
+        self.mock_set_correlation_ids.assert_called()
+        self.mock_inject_correlation_ids.assert_called()
+
+        del os.environ["DD_LOGS_INJECTION"]