Skip to content

Commit b98138c

Browse files
feat(tracer): [SVLS-5672] DynamoDB PutItem pointers (#10824)
PutItem is a bit tricky since it doesn't have a separate primary key section. The primary key is mixed into the Item itself. So we need a way for our customers to identify the primary key field names for a table. We'll start by doing this with configuration, and we may add a way for the user to opt in to us making a (cached) DescribeTable call on their behalf. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent 5461363 commit b98138c

File tree

7 files changed

+482
-0
lines changed

7 files changed

+482
-0
lines changed

ddtrace/_trace/trace_handlers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import wrapt
1111

12+
from ddtrace import config
1213
from ddtrace._trace._span_pointer import _SpanPointerDescription
1314
from ddtrace._trace.utils import extract_DD_context_from_messages
1415
from ddtrace._trace.utils_botocore.span_pointers import extract_span_pointers_from_successful_botocore_response
@@ -487,6 +488,7 @@ def _on_botocore_patched_api_call_success(ctx, response):
487488
set_botocore_response_metadata_tags(span, response)
488489

489490
for span_pointer_description in extract_span_pointers_from_successful_botocore_response(
491+
dynamodb_primary_key_names_for_tables=config.botocore.dynamodb_primary_key_names_for_tables,
490492
endpoint_name=ctx.get_item("endpoint_name"),
491493
operation_name=ctx.get_item("operation"),
492494
request_parameters=ctx.get_item("params"),

ddtrace/_trace/utils_botocore/span_pointers.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Dict
44
from typing import List
55
from typing import NamedTuple
6+
from typing import Set
67

78
from ddtrace._trace._span_pointer import _SpanPointerDescription
89
from ddtrace._trace._span_pointer import _SpanPointerDirection
@@ -13,7 +14,19 @@
1314
log = get_logger(__name__)
1415

1516

17+
_DynamoDBTableName = str
18+
_DynamoDBItemFieldName = str
19+
_DynamoDBItemTypeTag = str
20+
21+
_DynamoDBItemValue = Dict[_DynamoDBItemTypeTag, Any]
22+
_DynamoDBItem = Dict[_DynamoDBItemFieldName, _DynamoDBItemValue]
23+
24+
_DynamoDBItemPrimaryKeyValue = Dict[_DynamoDBItemTypeTag, str] # must be length 1
25+
_DynamoDBItemPrimaryKey = Dict[_DynamoDBItemFieldName, _DynamoDBItemPrimaryKeyValue]
26+
27+
1628
def extract_span_pointers_from_successful_botocore_response(
29+
dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]],
1730
endpoint_name: str,
1831
operation_name: str,
1932
request_parameters: Dict[str, Any],
@@ -22,9 +35,148 @@ def extract_span_pointers_from_successful_botocore_response(
2235
if endpoint_name == "s3":
2336
return _extract_span_pointers_for_s3_response(operation_name, request_parameters, response)
2437

38+
if endpoint_name == "dynamodb":
39+
return _extract_span_pointers_for_dynamodb_response(
40+
dynamodb_primary_key_names_for_tables, operation_name, request_parameters
41+
)
42+
43+
return []
44+
45+
46+
def _extract_span_pointers_for_dynamodb_response(
47+
dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]],
48+
operation_name: str,
49+
request_parameters: Dict[str, Any],
50+
) -> List[_SpanPointerDescription]:
51+
if operation_name == "PutItem":
52+
return _extract_span_pointers_for_dynamodb_putitem_response(
53+
dynamodb_primary_key_names_for_tables, request_parameters
54+
)
55+
2556
return []
2657

2758

59+
def _extract_span_pointers_for_dynamodb_putitem_response(
60+
dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]],
61+
request_parameters: Dict[str, Any],
62+
) -> List[_SpanPointerDescription]:
63+
try:
64+
table_name = request_parameters["TableName"]
65+
item = request_parameters["Item"]
66+
67+
return [
68+
_aws_dynamodb_item_span_pointer_description(
69+
pointer_direction=_SpanPointerDirection.DOWNSTREAM,
70+
table_name=table_name,
71+
primary_key=_aws_dynamodb_item_primary_key_from_item(
72+
dynamodb_primary_key_names_for_tables[table_name], item
73+
),
74+
)
75+
]
76+
77+
except Exception as e:
78+
log.warning(
79+
"failed to generate DynamoDB.PutItem span pointer: %s",
80+
str(e),
81+
)
82+
return []
83+
84+
85+
def _aws_dynamodb_item_primary_key_from_item(
86+
primary_key_field_names: Set[_DynamoDBItemFieldName],
87+
item: _DynamoDBItem,
88+
) -> _DynamoDBItemPrimaryKey:
89+
if len(primary_key_field_names) not in (1, 2):
90+
raise ValueError(f"unexpected number of primary key fields: {len(primary_key_field_names)}")
91+
92+
return {
93+
primary_key_field_name: _aws_dynamodb_extract_and_verify_primary_key_field_value_item(
94+
item, primary_key_field_name
95+
)
96+
for primary_key_field_name in primary_key_field_names
97+
}
98+
99+
100+
def _aws_dynamodb_item_span_pointer_description(
101+
pointer_direction: _SpanPointerDirection,
102+
table_name: _DynamoDBTableName,
103+
primary_key: _DynamoDBItemPrimaryKey,
104+
) -> _SpanPointerDescription:
105+
return _SpanPointerDescription(
106+
pointer_kind="aws.dynamodb.item",
107+
pointer_direction=pointer_direction,
108+
pointer_hash=_aws_dynamodb_item_span_pointer_hash(table_name, primary_key),
109+
extra_attributes={},
110+
)
111+
112+
113+
def _aws_dynamodb_extract_and_verify_primary_key_field_value_item(
114+
item: _DynamoDBItem,
115+
primary_key_field_name: _DynamoDBItemFieldName,
116+
) -> _DynamoDBItemPrimaryKeyValue:
117+
if primary_key_field_name not in item:
118+
raise ValueError(f"missing primary key field: {primary_key_field_name}")
119+
120+
value_object = item[primary_key_field_name]
121+
122+
if len(value_object) != 1:
123+
raise ValueError(f"primary key field {primary_key_field_name} must have exactly one value: {len(value_object)}")
124+
125+
value_type, value_data = next(iter(value_object.items()))
126+
if value_type not in ("S", "N", "B"):
127+
raise ValueError(f"unexpected primary key field {primary_key_field_name} value type: {value_type}")
128+
129+
if not isinstance(value_data, str):
130+
raise ValueError(f"unexpected primary key field {primary_key_field_name} value data type: {type(value_data)}")
131+
132+
return {value_type: value_data}
133+
134+
135+
def _aws_dynamodb_item_span_pointer_hash(table_name: _DynamoDBTableName, primary_key: _DynamoDBItemPrimaryKey) -> str:
136+
if len(primary_key) == 1:
137+
key, value_object = next(iter(primary_key.items()))
138+
encoded_key_1 = key.encode("utf-8")
139+
encoded_value_1 = _aws_dynamodb_item_encode_primary_key_value(value_object)
140+
encoded_key_2 = b""
141+
encoded_value_2 = b""
142+
143+
elif len(primary_key) == 2:
144+
(key_1, value_object_1), (key_2, value_object_2) = sorted(
145+
primary_key.items(), key=lambda x: x[0].encode("utf-8")
146+
)
147+
encoded_key_1 = key_1.encode("utf-8")
148+
encoded_value_1 = _aws_dynamodb_item_encode_primary_key_value(value_object_1)
149+
encoded_key_2 = key_2.encode("utf-8")
150+
encoded_value_2 = _aws_dynamodb_item_encode_primary_key_value(value_object_2)
151+
152+
else:
153+
raise ValueError(f"unexpected number of primary key fields: {len(primary_key)}")
154+
155+
return _standard_hashing_function(
156+
table_name.encode("utf-8"),
157+
encoded_key_1,
158+
encoded_value_1,
159+
encoded_key_2,
160+
encoded_value_2,
161+
)
162+
163+
164+
def _aws_dynamodb_item_encode_primary_key_value(value_object: _DynamoDBItemPrimaryKeyValue) -> bytes:
165+
if len(value_object) != 1:
166+
raise ValueError(f"primary key value object must have exactly one field: {len(value_object)}")
167+
168+
value_type, value = next(iter(value_object.items()))
169+
170+
if value_type == "S":
171+
return value.encode("utf-8")
172+
173+
if value_type in ("N", "B"):
174+
# these should already be here as ASCII strings
175+
return value.encode("ascii")
176+
177+
raise ValueError(f"unknown primary key value type: {value_type}")
178+
179+
28180
def _extract_span_pointers_for_s3_response(
29181
operation_name: str,
30182
request_parameters: Dict[str, Any],

ddtrace/contrib/botocore/__init__.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,37 @@
108108
109109
Default: ``128``
110110
111+
112+
.. py:data:: ddtrace.config.botocore['dynamodb_primary_key_names_for_tables']
113+
114+
This enables DynamoDB API calls to be instrumented with span pointers. Many
115+
DynamoDB API calls do not include the Item's Primary Key fields as separate
116+
values, so they need to be provided to the tracer separately. This field
117+
should be structured as a ``dict`` keyed by the table names as ``str``.
118+
Each value should be the ``set`` of primary key field names (as ``str``)
119+
for the associated table. The set may have exactly one or two elements,
120+
depending on the Table's Primary Key schema.
121+
122+
In python this would look like::
123+
124+
ddtrace.config.botocore['dynamodb_primary_key_names_for_tables'] = {
125+
'table_name': {'key1', 'key2'},
126+
'other_table': {'other_key'},
127+
}
128+
129+
Can also be enabled with the ``DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS``
130+
environment variable which is parsed as a JSON object with strings for keys
131+
and lists of strings for values.
132+
133+
This would look something like::
134+
135+
export DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS='{
136+
"table_name": ["key1", "key2"],
137+
"other_table": ["other_key"]
138+
}'
139+
140+
Default: ``{}``
141+
111142
"""
112143

113144

ddtrace/contrib/internal/botocore/patch.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
Trace queries to aws api done via botocore client
33
"""
44
import collections
5+
import json
56
import os
7+
from typing import Dict # noqa:F401
68
from typing import List # noqa:F401
79
from typing import Set # noqa:F401
810
from typing import Union # noqa:F401
@@ -59,6 +61,32 @@
5961
log = get_logger(__name__)
6062

6163

64+
def _load_dynamodb_primary_key_names_for_tables() -> Dict[str, Set[str]]:
65+
try:
66+
encoded_table_primary_keys = os.getenv("DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS", "{}")
67+
raw_table_primary_keys = json.loads(encoded_table_primary_keys)
68+
69+
table_primary_keys = {}
70+
for table, primary_keys in raw_table_primary_keys.items():
71+
if not isinstance(table, str):
72+
raise ValueError(f"expected string table name: {table}")
73+
74+
if not isinstance(primary_keys, list):
75+
raise ValueError(f"expected list of primary keys: {primary_keys}")
76+
77+
unique_primary_keys = set(primary_keys)
78+
if not len(unique_primary_keys) == len(primary_keys):
79+
raise ValueError(f"expected unique primary keys: {primary_keys}")
80+
81+
table_primary_keys[table] = unique_primary_keys
82+
83+
return table_primary_keys
84+
85+
except Exception as e:
86+
log.warning("failed to load DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS: %s", e)
87+
return {}
88+
89+
6290
# Botocore default settings
6391
config._add(
6492
"botocore",
@@ -73,6 +101,7 @@
73101
"instrument_internals": asbool(os.getenv("DD_BOTOCORE_INSTRUMENT_INTERNALS", default=False)),
74102
"propagation_enabled": asbool(os.getenv("DD_BOTOCORE_PROPAGATION_ENABLED", default=False)),
75103
"empty_poll_enabled": asbool(os.getenv("DD_BOTOCORE_EMPTY_POLL_ENABLED", default=True)),
104+
"dynamodb_primary_key_names_for_tables": _load_dynamodb_primary_key_names_for_tables(),
76105
},
77106
)
78107

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
features:
3+
- |
4+
botocore: Adds span pointers for successful DynamoDB ``PutItem`` spans. Table Primary Keys need to be provided with the ``ddtrace.config.botocore.dynamodb_primary_key_names_for_tables`` option or the ``DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS`` environment variable.

0 commit comments

Comments
 (0)