|
2 | 2 | from collections import defaultdict
|
3 | 3 | from itertools import chain
|
4 | 4 | import logging
|
| 5 | +import re |
5 | 6 | from threading import RLock
|
6 | 7 | from typing import Any
|
7 | 8 | from typing import DefaultDict
|
|
15 | 16 | from ddtrace._trace.span import Span
|
16 | 17 | from ddtrace._trace.span import _get_64_highest_order_bits_as_hex
|
17 | 18 | from ddtrace.constants import _APM_ENABLED_METRIC_KEY as MK_APM_ENABLED
|
| 19 | +from ddtrace.ext import http |
18 | 20 | from ddtrace.internal import gitmetadata
|
19 | 21 | from ddtrace.internal import telemetry
|
20 | 22 | from ddtrace.internal.constants import COMPONENT
|
@@ -497,3 +499,68 @@ def reset(
|
497 | 499 | "spans_created": defaultdict(int),
|
498 | 500 | "spans_finished": defaultdict(int),
|
499 | 501 | }
|
| 502 | + |
| 503 | + |
| 504 | +class ResourceRenamingProcessor(SpanProcessor): |
| 505 | + def __init__(self): |
| 506 | + self._URL_PATH_EXTRACTION_RE = re.compile( |
| 507 | + r"^(?P<protocol>[a-z]+://(?P<host>[^?/]+))?(?P<path>/[^?]*)(?P<query>(\?).*)?$" |
| 508 | + ) |
| 509 | + |
| 510 | + self._INT_RE = re.compile(r"^[1-9][0-9]+$") |
| 511 | + self._INT_ID_RE = re.compile(r"^[0-9._-]{3,}$") |
| 512 | + self._HEX_RE = re.compile(r"^[A-Fa-f0-9]{6,}$") |
| 513 | + self._HEX_ID_RE = re.compile(r"^[A-Fa-f0-9._-]{6,}$") |
| 514 | + self._STR_RE = re.compile(r"^(.{20,}|.*[%&'()*+,:=@].*)$") |
| 515 | + |
| 516 | + def _compute_simplified_endpoint_path_element(self, elem: str) -> str: |
| 517 | + """Applies the parameter replacement rules to a single path element.""" |
| 518 | + if self._INT_RE.fullmatch(elem): |
| 519 | + return "{param:int}" |
| 520 | + if self._INT_ID_RE.fullmatch(elem): |
| 521 | + return "{param:int_id}" |
| 522 | + if self._HEX_RE.fullmatch(elem): |
| 523 | + return "{param:hex}" |
| 524 | + if self._HEX_ID_RE.fullmatch(elem): |
| 525 | + return "{param:hex_id}" |
| 526 | + if self._STR_RE.fullmatch(elem): |
| 527 | + return "{param:str}" |
| 528 | + return elem |
| 529 | + |
| 530 | + def _compute_simplified_endpoint(self, url: Optional[str]) -> str: |
| 531 | + """Extracts and simplifies the path from an HTTP URL according to the RFC.""" |
| 532 | + if not url: |
| 533 | + return "/" |
| 534 | + |
| 535 | + match = self._URL_PATH_EXTRACTION_RE.match(url) |
| 536 | + if not match: |
| 537 | + return "" |
| 538 | + path = match.group("path") |
| 539 | + if not path or path == "/": |
| 540 | + return "/" |
| 541 | + |
| 542 | + elements: List[str] = [] |
| 543 | + for part in path.split("/"): |
| 544 | + if part: |
| 545 | + elements.append(part) |
| 546 | + if len(elements) >= 8: |
| 547 | + break |
| 548 | + |
| 549 | + if not elements: |
| 550 | + return "/" |
| 551 | + |
| 552 | + elements = [self._compute_simplified_endpoint_path_element(elem) for elem in elements] |
| 553 | + return "/" + "/".join(elements) |
| 554 | + |
| 555 | + def on_span_start(self, span): |
| 556 | + pass |
| 557 | + |
| 558 | + def on_span_finish(self, span): |
| 559 | + route = span.get_tag(http.ROUTE) |
| 560 | + |
| 561 | + if not route or config._trace_resource_renaming_always_simplified_endpoint: |
| 562 | + url = span.get_tag(http.URL) |
| 563 | + endpoint = self._compute_simplified_endpoint(url) |
| 564 | + span.set_tag_str(http.ENDPOINT, endpoint) |
| 565 | + else: |
| 566 | + span.set_tag_str(http.ENDPOINT, endpoint) |
0 commit comments