Skip to content

Commit cdfa633

Browse files
committed
feat(tracing): resource renaming
1 parent 6d48bf3 commit cdfa633

File tree

7 files changed

+278
-2
lines changed

7 files changed

+278
-2
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import re
2+
from typing import List
3+
from typing import Optional
4+
5+
from ddtrace._trace.processor import SpanProcessor
6+
from ddtrace.ext import http
7+
from ddtrace.settings._config import config
8+
9+
10+
class ResourceRenamingProcessor(SpanProcessor):
11+
def __init__(self):
12+
self._URL_PATH_EXTRACTION_RE = re.compile(
13+
r"^(?P<protocol>[a-z]+://(?P<host>[^?/]+))?(?P<path>/[^?]*)(?P<query>(\?).*)?$"
14+
)
15+
16+
self._INT_RE = re.compile(r"^[1-9][0-9]+$")
17+
self._INT_ID_RE = re.compile(r"^(?=.*[0-9].*)[0-9._-]{3,}$")
18+
self._HEX_RE = re.compile(r"^(?=.*[0-9].*)[A-Fa-f0-9]{6,}$")
19+
self._HEX_ID_RE = re.compile(r"^(?=.*[0-9].*)[A-Fa-f0-9._-]{6,}$")
20+
self._STR_RE = re.compile(r"^(.{20,}|.*[%&'()*+,:=@].*)$")
21+
22+
def _compute_simplified_endpoint_path_element(self, elem: str) -> str:
23+
"""Applies the parameter replacement rules to a single path element."""
24+
if self._INT_RE.fullmatch(elem):
25+
return "{param:int}"
26+
if self._INT_ID_RE.fullmatch(elem):
27+
return "{param:int_id}"
28+
if self._HEX_RE.fullmatch(elem):
29+
return "{param:hex}"
30+
if self._HEX_ID_RE.fullmatch(elem):
31+
return "{param:hex_id}"
32+
if self._STR_RE.fullmatch(elem):
33+
return "{param:str}"
34+
return elem
35+
36+
def _compute_simplified_endpoint(self, url: Optional[str]) -> str:
37+
"""Extracts and simplifies the path from an HTTP URL according to the RFC."""
38+
if not url:
39+
return "/"
40+
41+
match = self._URL_PATH_EXTRACTION_RE.match(url)
42+
if not match:
43+
return ""
44+
path = match.group("path")
45+
if not path or path == "/":
46+
return "/"
47+
48+
elements: List[str] = []
49+
for part in path.split("/"):
50+
if part:
51+
elements.append(part)
52+
if len(elements) >= 8:
53+
break
54+
55+
if not elements:
56+
return "/"
57+
58+
elements = [self._compute_simplified_endpoint_path_element(elem) for elem in elements]
59+
return "/" + "/".join(elements)
60+
61+
def on_span_start(self, span):
62+
pass
63+
64+
def on_span_finish(self, span):
65+
route = span.get_tag(http.ROUTE)
66+
67+
if not route or config._trace_resource_renaming_always_simplified_endpoint:
68+
url = span.get_tag(http.URL)
69+
endpoint = self._compute_simplified_endpoint(url)
70+
span.set_tag_str(http.ENDPOINT, endpoint)
71+
else:
72+
span.set_tag_str(http.ENDPOINT, route)

ddtrace/_trace/tracer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ddtrace._trace.processor import SpanProcessor
2121
from ddtrace._trace.processor import TopLevelSpanProcessor
2222
from ddtrace._trace.processor import TraceProcessor
23+
from ddtrace._trace.processor.resource_renaming import ResourceRenamingProcessor
2324
from ddtrace._trace.provider import BaseContextProvider
2425
from ddtrace._trace.provider import DefaultContextProvider
2526
from ddtrace._trace.span import Span
@@ -74,6 +75,9 @@ def _default_span_processors_factory(
7475
span_processors: List[SpanProcessor] = []
7576
span_processors += [TopLevelSpanProcessor()]
7677

78+
if config._trace_resource_renaming_enabled:
79+
span_processors.append(ResourceRenamingProcessor())
80+
7781
# When using the NativeWriter stats are computed by the native code.
7882
if config._trace_compute_stats and not config._trace_writer_native:
7983
# Inline the import to avoid pulling in ddsketch or protobuf

ddtrace/ext/http.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
CLIENT_IP = "http.client_ip"
1919
ROUTE = "http.route"
2020
REFERRER_HOSTNAME = "http.referrer_hostname"
21+
ENDPOINT = "http.endpoint"
2122

2223
# HTTP headers
2324
REFERER_HEADER = "referer"

ddtrace/internal/processor/stats.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ def _is_measured(span: Span) -> bool:
4848
str, # type
4949
int, # http status code
5050
bool, # synthetics request
51+
str, # http method
52+
str, # http endpoint
5153
]
5254

5355

@@ -73,8 +75,10 @@ def _span_aggr_key(span: Span) -> SpanAggrKey:
7375
resource = span.resource or ""
7476
_type = span.span_type or ""
7577
status_code = span.get_tag("http.status_code") or 0
78+
method = span.get_tag("http.method") or ""
79+
endpoint = span.get_tag("http.endpoint") or ""
7680
synthetics = span.context.dd_origin == "synthetics"
77-
return span.name, service, resource, _type, int(status_code), synthetics
81+
return (span.name, service, resource, _type, int(status_code), synthetics, method, endpoint)
7882

7983

8084
class SpanStatsProcessorV06(PeriodicService, SpanProcessor):
@@ -157,12 +161,14 @@ def _serialize_buckets(self) -> List[Dict]:
157161
serialized_bucket_keys.append(bucket_time_ns)
158162

159163
for aggr_key, stat_aggr in bucket.items():
160-
name, service, resource, _type, http_status, synthetics = aggr_key
164+
name, service, resource, _type, http_status, synthetics, http_method, http_endpoint = aggr_key
161165
serialized_bucket = {
162166
"Name": compat.ensure_text(name),
163167
"Resource": compat.ensure_text(resource),
164168
"Synthetics": synthetics,
165169
"HTTPStatusCode": http_status,
170+
"Method": http_method,
171+
"Endpoint": http_endpoint,
166172
"Hits": stat_aggr.hits,
167173
"TopLevelHits": stat_aggr.top_level_hits,
168174
"Duration": stat_aggr.duration,

ddtrace/settings/_config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,14 @@ def __init__(self):
661661
self._inferred_proxy_services_enabled = _get_config("DD_TRACE_INFERRED_PROXY_SERVICES_ENABLED", False, asbool)
662662
self._trace_safe_instrumentation_enabled = _get_config("DD_TRACE_SAFE_INSTRUMENTATION_ENABLED", False, asbool)
663663

664+
# Resource renaming
665+
self._trace_resource_renaming_enabled = _get_config(
666+
"DD_TRACE_RESOURCE_RENAMING", default=False, modifier=asbool
667+
)
668+
self._trace_resource_renaming_always_simplified_endpoint = _get_config(
669+
"DD_TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT", default=False, modifier=asbool
670+
)
671+
664672
def __getattr__(self, name) -> Any:
665673
if name in self._config:
666674
return self._config[name].value()

tests/telemetry/test_writer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,8 @@ def test_app_started_event_configuration_override(test_agent_session, run_python
519519
{"name": "DD_TRACE_PROPAGATION_STYLE_INJECT", "origin": "env_var", "value": "tracecontext"},
520520
{"name": "DD_TRACE_RATE_LIMIT", "origin": "env_var", "value": 50},
521521
{"name": "DD_TRACE_REPORT_HOSTNAME", "origin": "default", "value": False},
522+
{"name": "DD_TRACE_RESOURCE_RENAMING", "origin": "default", "value": False},
523+
{"name": "DD_TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT", "origin": "default", "value": False},
522524
{"name": "DD_TRACE_SAFE_INSTRUMENTATION_ENABLED", "origin": "default", "value": False},
523525
{
524526
"name": "DD_TRACE_SAMPLING_RULES",
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import pytest
2+
3+
from ddtrace._trace.processor.resource_renaming import ResourceRenamingProcessor
4+
from ddtrace.ext import http
5+
from ddtrace.trace import Context
6+
from ddtrace.trace import Span
7+
from tests.utils import override_global_config
8+
9+
10+
class TestResourceRenaming:
11+
@pytest.mark.parametrize(
12+
"elem,expected",
13+
[
14+
# Integer patterns
15+
("123", "{param:int}"),
16+
("10", "{param:int}"),
17+
("12345", "{param:int}"),
18+
("0", "0"),
19+
("01", "01"),
20+
# Integer ID patterns
21+
("123.456", "{param:int_id}"),
22+
("123-456-789", "{param:int_id}"),
23+
("0123", "{param:int_id}"),
24+
# Hex patterns (require at least one digit)
25+
("123ABC", "{param:hex}"),
26+
("a1b2c3", "{param:hex}"),
27+
("abcdef", "abcdef"),
28+
("ABCDEF", "ABCDEF"),
29+
("abcde", "abcde"),
30+
# Hex ID patterns
31+
("123.ABC", "{param:hex_id}"),
32+
("a1b2-c3d4", "{param:hex_id}"),
33+
("abc-def", "abc-def"),
34+
# String patterns
35+
("this_is_a_very_long_string", "{param:str}"),
36+
("with%special&chars", "{param:str}"),
37+
("[email protected]", "{param:str}"),
38+
("file.with.dots", "file.with.dots"),
39+
# No match cases
40+
("users", "users"),
41+
("short", "short"),
42+
("xyz123", "xyz123"),
43+
],
44+
)
45+
def test_compute_simplified_endpoint_path_element(self, elem, expected):
46+
processor = ResourceRenamingProcessor()
47+
result = processor._compute_simplified_endpoint_path_element(elem)
48+
assert result == expected
49+
50+
@pytest.mark.parametrize(
51+
"url,expected",
52+
[
53+
# Basic cases
54+
("", "/"),
55+
("http://example.com", ""),
56+
("http://example.com/", "/"),
57+
("/users", "/users"),
58+
("https://example.com/users", "/users"),
59+
# Query and fragment handling
60+
("http://example.com/api/users?id=123", "/api/users"),
61+
("https://example.com/users/123#section", "/users/123#section"),
62+
("https://example.com/users/123?filter=active#top", "/users/{param:int}"),
63+
# Parameter replacement
64+
("/users/123", "/users/{param:int}"),
65+
("/users/5", "/users/5"),
66+
("/users/0123", "/users/{param:int_id}"),
67+
("/items/123-456", "/items/{param:int_id}"),
68+
("/commits/abc123", "/commits/{param:hex}"),
69+
("/sessions/deadbeef", "/sessions/deadbeef"),
70+
("/items/abc123-def", "/items/{param:hex_id}"),
71+
("/files/verylongfilename12345", "/files/{param:str}"),
72+
("/users/user@example", "/users/{param:str}"),
73+
# Path limits and edge cases
74+
("/a/b/c/d/e/f/g/h/i/j/k", "/a/b/c/d/e/f/g/h"),
75+
("/api//v1///users//123", "/api/v1/users/{param:int}"),
76+
("///////////////////////", "/"),
77+
# Complex mixed cases
78+
(
79+
"/api/v2/users/123/posts/abc123/comments/hello%20world",
80+
"/api/v2/users/{param:int}/posts/{param:hex}/comments/{param:str}",
81+
),
82+
(
83+
"/12/123-456/abc123/abc-def-123/longstringthathastoomanycharacters",
84+
"/{param:int}/{param:int_id}/{param:hex}/{param:hex_id}/{param:str}",
85+
),
86+
# Error cases
87+
(None, "/"),
88+
("invalid-url", ""),
89+
("://malformed", ""),
90+
],
91+
)
92+
def test_compute_simplified_endpoint(self, url, expected):
93+
processor = ResourceRenamingProcessor()
94+
result = processor._compute_simplified_endpoint(url)
95+
assert result == expected
96+
97+
def test_processor_with_route(self):
98+
processor = ResourceRenamingProcessor()
99+
span = Span("test", context=Context())
100+
span.set_tag(http.ROUTE, "/api/users/{id}")
101+
span.set_tag(http.URL, "https://example.com/api/users/123")
102+
103+
processor.on_span_finish(span)
104+
assert span.get_tag(http.ENDPOINT) == "/api/users/{id}"
105+
106+
def test_processor_without_route(self):
107+
processor = ResourceRenamingProcessor()
108+
span = Span("test", context=Context())
109+
span.set_tag(http.URL, "https://example.com/api/users/123")
110+
111+
processor.on_span_finish(span)
112+
assert span.get_tag(http.ENDPOINT) == "/api/users/{param:int}"
113+
114+
@override_global_config(dict(_trace_resource_renaming_always_simplified_endpoint=True))
115+
def test_processor_always_simplified_endpoint(self):
116+
processor = ResourceRenamingProcessor()
117+
span = Span("test", context=Context())
118+
span.set_tag(http.ROUTE, "/api/users/{id}")
119+
span.set_tag(http.URL, "https://example.com/api/users/123")
120+
121+
processor.on_span_finish(span)
122+
# Should use simplified endpoint even when route exists
123+
assert span.get_tag(http.ENDPOINT) == "/api/users/{id}"
124+
125+
def test_processor_no_url_no_route(self):
126+
processor = ResourceRenamingProcessor()
127+
span = Span("test", context=Context())
128+
129+
processor.on_span_finish(span)
130+
assert span.get_tag(http.ENDPOINT) == "/"
131+
132+
def test_processor_empty_url(self):
133+
processor = ResourceRenamingProcessor()
134+
span = Span("test", context=Context())
135+
span.set_tag(http.URL, "")
136+
137+
processor.on_span_finish(span)
138+
assert span.get_tag(http.ENDPOINT) == "/"
139+
140+
def test_processor_malformed_url(self):
141+
processor = ResourceRenamingProcessor()
142+
span = Span("test", context=Context())
143+
span.set_tag(http.URL, "not-a-valid-url")
144+
145+
processor.on_span_finish(span)
146+
assert span.get_tag(http.ENDPOINT) == ""
147+
148+
def test_regex_patterns(self):
149+
processor = ResourceRenamingProcessor()
150+
151+
# Integer pattern
152+
assert processor._INT_RE.fullmatch("123")
153+
assert not processor._INT_RE.fullmatch("0")
154+
assert not processor._INT_RE.fullmatch("01")
155+
156+
# Hex pattern (requires at least one digit)
157+
assert processor._HEX_RE.fullmatch("123ABC")
158+
assert not processor._HEX_RE.fullmatch("ABCDEF")
159+
assert not processor._HEX_RE.fullmatch("deadbeef")
160+
161+
def test_path_limit(self):
162+
processor = ResourceRenamingProcessor()
163+
span = Span("test", context=Context())
164+
long_path = "/" + "/".join([f"segment{i}" for i in range(20)])
165+
span.set_tag(http.URL, f"https://example.com{long_path}")
166+
processor.on_span_finish(span)
167+
endpoint = span.get_tag(http.ENDPOINT)
168+
segments = [s for s in endpoint.split("/") if s]
169+
assert len(segments) == 8
170+
171+
def test_realistic_urls(self):
172+
processor = ResourceRenamingProcessor()
173+
test_cases = [
174+
("https://api.github.com/repos/user/repo/issues/123", "/repos/user/repo/issues/{param:int}"),
175+
("https://shop.example.com/products/12345/reviews", "/products/{param:int}/reviews"),
176+
("https://files.example.com/uploads/documents/verylongdocumentname", "/uploads/documents/{param:str}"),
177+
]
178+
179+
for url, expected in test_cases:
180+
span = Span("test", context=Context())
181+
span.set_tag(http.URL, url)
182+
processor.on_span_finish(span)
183+
assert span.get_tag(http.ENDPOINT) == expected

0 commit comments

Comments
 (0)