Skip to content

Commit a1a885d

Browse files
committed
feat(tracing): resource renaming
1 parent 6d48bf3 commit a1a885d

File tree

5 files changed

+88
-2
lines changed

5 files changed

+88
-2
lines changed

ddtrace/_trace/processor/__init__.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from collections import defaultdict
33
from itertools import chain
44
import logging
5+
import re
56
from threading import RLock
67
from typing import Any
78
from typing import DefaultDict
@@ -15,6 +16,7 @@
1516
from ddtrace._trace.span import Span
1617
from ddtrace._trace.span import _get_64_highest_order_bits_as_hex
1718
from ddtrace.constants import _APM_ENABLED_METRIC_KEY as MK_APM_ENABLED
19+
from ddtrace.ext import http
1820
from ddtrace.internal import gitmetadata
1921
from ddtrace.internal import telemetry
2022
from ddtrace.internal.constants import COMPONENT
@@ -497,3 +499,68 @@ def reset(
497499
"spans_created": defaultdict(int),
498500
"spans_finished": defaultdict(int),
499501
}
502+
503+
504+
class ResourceRenamingProcessor(SpanProcessor):
505+
def __init__(self):
506+
self._URL_PATH_EXTRACTION_RE = re.compile(
507+
r"^(?P<protocol>[a-z]+://(?P<host>[^?/]+))?(?P<path>/[^?]*)(?P<query>(\?).*)?$"
508+
)
509+
510+
self._INT_RE = re.compile(r"^[1-9][0-9]+$")
511+
self._INT_ID_RE = re.compile(r"^[0-9._-]{3,}$")
512+
self._HEX_RE = re.compile(r"^[A-Fa-f0-9]{6,}$")
513+
self._HEX_ID_RE = re.compile(r"^[A-Fa-f0-9._-]{6,}$")
514+
self._STR_RE = re.compile(r"^(.{20,}|.*[%&'()*+,:=@].*)$")
515+
516+
def _compute_simplified_endpoint_path_element(self, elem: str) -> str:
517+
"""Applies the parameter replacement rules to a single path element."""
518+
if self._INT_RE.fullmatch(elem):
519+
return "{param:int}"
520+
if self._INT_ID_RE.fullmatch(elem):
521+
return "{param:int_id}"
522+
if self._HEX_RE.fullmatch(elem):
523+
return "{param:hex}"
524+
if self._HEX_ID_RE.fullmatch(elem):
525+
return "{param:hex_id}"
526+
if self._STR_RE.fullmatch(elem):
527+
return "{param:str}"
528+
return elem
529+
530+
def _compute_simplified_endpoint(self, url: Optional[str]) -> str:
531+
"""Extracts and simplifies the path from an HTTP URL according to the RFC."""
532+
if not url:
533+
return "/"
534+
535+
match = self._URL_PATH_EXTRACTION_RE.match(url)
536+
if not match:
537+
return ""
538+
path = match.group("path")
539+
if not path or path == "/":
540+
return "/"
541+
542+
elements: List[str] = []
543+
for part in path.split("/"):
544+
if part:
545+
elements.append(part)
546+
if len(elements) >= 8:
547+
break
548+
549+
if not elements:
550+
return "/"
551+
552+
elements = [self._compute_simplified_endpoint_path_element(elem) for elem in elements]
553+
return "/" + "/".join(elements)
554+
555+
def on_span_start(self, span):
556+
pass
557+
558+
def on_span_finish(self, span):
559+
route = span.get_tag(http.ROUTE)
560+
561+
if not route or config._trace_resource_renaming_always_simplified_endpoint:
562+
url = span.get_tag(http.URL)
563+
endpoint = self._compute_simplified_endpoint(url)
564+
span.set_tag_str(http.ENDPOINT, endpoint)
565+
else:
566+
span.set_tag_str(http.ENDPOINT, endpoint)

ddtrace/_trace/tracer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from typing import cast
1717

1818
from ddtrace._trace.context import Context
19+
from ddtrace._trace.processor import ResourceRenamingProcessor
1920
from ddtrace._trace.processor import SpanAggregator
2021
from ddtrace._trace.processor import SpanProcessor
2122
from ddtrace._trace.processor import TopLevelSpanProcessor
@@ -84,6 +85,9 @@ def _default_span_processors_factory(
8485
SpanStatsProcessorV06(),
8586
)
8687

88+
if config._trace_resource_renaming_enabled:
89+
span_processors.append(ResourceRenamingProcessor())
90+
8791
span_processors.append(profiling_span_processor)
8892

8993
return span_processors

ddtrace/ext/http.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
CLIENT_IP = "http.client_ip"
1919
ROUTE = "http.route"
2020
REFERRER_HOSTNAME = "http.referrer_hostname"
21+
ENDPOINT = "http.endpoint"
2122

2223
# HTTP headers
2324
REFERER_HEADER = "referer"

ddtrace/internal/processor/stats.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ def _is_measured(span: Span) -> bool:
4848
str, # type
4949
int, # http status code
5050
bool, # synthetics request
51+
str, # http method
52+
str, # http endpoint
5153
]
5254

5355

@@ -73,8 +75,10 @@ def _span_aggr_key(span: Span) -> SpanAggrKey:
7375
resource = span.resource or ""
7476
_type = span.span_type or ""
7577
status_code = span.get_tag("http.status_code") or 0
78+
method = span.get_tag("http.method") or ""
79+
endpoint = span.get_tag("http.endpoint") or ""
7680
synthetics = span.context.dd_origin == "synthetics"
77-
return span.name, service, resource, _type, int(status_code), synthetics
81+
return (span.name, service, resource, _type, int(status_code), synthetics, method, endpoint)
7882

7983

8084
class SpanStatsProcessorV06(PeriodicService, SpanProcessor):
@@ -157,12 +161,14 @@ def _serialize_buckets(self) -> List[Dict]:
157161
serialized_bucket_keys.append(bucket_time_ns)
158162

159163
for aggr_key, stat_aggr in bucket.items():
160-
name, service, resource, _type, http_status, synthetics = aggr_key
164+
name, service, resource, _type, http_status, synthetics, http_method, http_endpoint = aggr_key
161165
serialized_bucket = {
162166
"Name": compat.ensure_text(name),
163167
"Resource": compat.ensure_text(resource),
164168
"Synthetics": synthetics,
165169
"HTTPStatusCode": http_status,
170+
"HTTPMethod": http_method,
171+
"HTTPEndpoint": http_endpoint,
166172
"Hits": stat_aggr.hits,
167173
"TopLevelHits": stat_aggr.top_level_hits,
168174
"Duration": stat_aggr.duration,

ddtrace/settings/_config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,14 @@ def __init__(self):
661661
self._inferred_proxy_services_enabled = _get_config("DD_TRACE_INFERRED_PROXY_SERVICES_ENABLED", False, asbool)
662662
self._trace_safe_instrumentation_enabled = _get_config("DD_TRACE_SAFE_INSTRUMENTATION_ENABLED", False, asbool)
663663

664+
# Resource renaming
665+
self._trace_resource_renaming_enabled = _get_config(
666+
"DD_TRACE_RESOURCE_RENAMING", default=False, modifier=asbool
667+
)
668+
self._trace_resource_renaming_always_simplified_endpoint = _get_config(
669+
"DD_TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT", default=False, modifier=asbool
670+
)
671+
664672
def __getattr__(self, name) -> Any:
665673
if name in self._config:
666674
return self._config[name].value()

0 commit comments

Comments
 (0)