From 1e91a8dc8428d01ae0c7137fc92c23e5da523609 Mon Sep 17 00:00:00 2001 From: Radhika Gupta Date: Fri, 30 May 2025 12:28:14 -0700 Subject: [PATCH] Updated the instrumentation with aiohttp-server tests for url redaction --- CHANGELOG.md | 2 + .../aiohttp_client/__init__.py | 6 +- .../tests/test_aiohttp_client_integration.py | 8 +-- .../aiohttp_server/__init__.py | 15 +++- .../tests/test_aiohttp_server_integration.py | 38 ++++++++++ .../instrumentation/asgi/__init__.py | 4 +- .../tests/test_asgi_middleware.py | 5 +- .../instrumentation/httpx/__init__.py | 4 +- .../tests/test_httpx_integration.py | 12 ++-- .../instrumentation/requests/__init__.py | 4 +- .../tests/test_requests_integration.py | 6 +- .../instrumentation/tornado/client.py | 6 +- .../tests/test_instrumentation.py | 8 +-- .../instrumentation/urllib/__init__.py | 4 +- .../tests/test_urllib_integration.py | 4 +- .../instrumentation/wsgi/__init__.py | 4 +- .../tests/test_wsgi_middleware.py | 5 +- .../src/opentelemetry/util/http/__init__.py | 72 +++++++++++++++---- .../tests/test_redact_query_parameters.py | 51 +++++++++++++ .../tests/test_redact_url.py | 39 ++++++++++ .../tests/test_remove_credentials.py | 14 ++-- 21 files changed, 253 insertions(+), 58 deletions(-) create mode 100644 util/opentelemetry-util-http/tests/test_redact_query_parameters.py create mode 100644 util/opentelemetry-util-http/tests/test_redact_url.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9be7b9b60b..7db5c89c24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `opentelemetry-instrumentation-aiohttp-client` Add support for HTTP metrics ([#3517](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3517)) +- `opentelemetry-util-http` Added support for redacting specific url query string values and url credentials in instrumentations + ([#3508](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3508)) - `opentelemetry-instrumentation-httpx` Add support for HTTP metrics ([#3513](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3513)) diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py b/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py index 7bcf9fa1b4..bee39d13b0 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py @@ -135,7 +135,7 @@ def response_hook(span: Span, params: typing.Union[ ) from opentelemetry.trace import Span, SpanKind, TracerProvider, get_tracer from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import remove_url_credentials, sanitize_method +from opentelemetry.util.http import redact_url, sanitize_method _UrlFilterT = typing.Optional[typing.Callable[[yarl.URL], str]] _RequestHookT = typing.Optional[ @@ -311,9 +311,9 @@ async def on_request_start( method = params.method request_span_name = _get_span_name(method) request_url = ( - remove_url_credentials(trace_config_ctx.url_filter(params.url)) + redact_url(trace_config_ctx.url_filter(params.url)) if callable(trace_config_ctx.url_filter) - else remove_url_credentials(str(params.url)) + else redact_url(str(params.url)) ) span_attributes = {} diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-client/tests/test_aiohttp_client_integration.py b/instrumentation/opentelemetry-instrumentation-aiohttp-client/tests/test_aiohttp_client_integration.py index 042f4502bb..5e6300e5d8 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-client/tests/test_aiohttp_client_integration.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-client/tests/test_aiohttp_client_integration.py @@ -762,16 +762,16 @@ async def do_request(url): ) self.memory_exporter.clear() - def test_credential_removal(self): + def test_remove_sensitive_params(self): trace_configs = [aiohttp_client.create_trace_config()] - app = HttpServerMock("test_credential_removal") + app = HttpServerMock("test_remove_sensitive_params") @app.route("/status/200") def index(): return "hello" - url = "http://username:password@localhost:5000/status/200" + url = "http://username:password@localhost:5000/status/200?Signature=secret" with app.run("localhost", 5000): with self.subTest(url=url): @@ -793,7 +793,7 @@ async def do_request(url): (StatusCode.UNSET, None), { HTTP_METHOD: "GET", - HTTP_URL: ("http://localhost:5000/status/200"), + HTTP_URL: ("http://REDACTED:REDACTED@localhost:5000/status/200?Signature=REDACTED"), HTTP_STATUS_CODE: int(HTTPStatus.OK), }, ) diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py b/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py index 56dc0a4c25..bf88caab5f 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py @@ -72,7 +72,7 @@ async def hello(request): ) from opentelemetry.semconv.metrics import MetricInstruments from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import get_excluded_urls, remove_url_credentials +from opentelemetry.util.http import get_excluded_urls, redact_url _duration_attrs = [ HTTP_METHOD, @@ -148,6 +148,17 @@ def collect_request_attributes(request: web.Request) -> Dict: request.url.port, str(request.url), ) + + user_info = request.headers.get("Authorization") + if user_info and http_url and "@" not in http_url: + # If there are credentials in Authorization header but not in URL + # Add dummy credentials that will be redacted + parsed = urllib.parse.urlparse(http_url) + netloc_with_auth = f"username:password@{parsed.netloc}" + http_url = urllib.parse.urlunparse( + (parsed.scheme, netloc_with_auth, parsed.path, parsed.params, parsed.query, parsed.fragment) + ) + query_string = request.query_string if query_string and http_url: if isinstance(query_string, bytes): @@ -161,7 +172,7 @@ def collect_request_attributes(request: web.Request) -> Dict: HTTP_ROUTE: _get_view_func(request), HTTP_FLAVOR: f"{request.version.major}.{request.version.minor}", HTTP_TARGET: request.path, - HTTP_URL: remove_url_credentials(http_url), + HTTP_URL: redact_url(http_url), } http_method = request.method diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-server/tests/test_aiohttp_server_integration.py b/instrumentation/opentelemetry-instrumentation-aiohttp-server/tests/test_aiohttp_server_integration.py index 2e0c2e7352..31ece8b4ca 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-server/tests/test_aiohttp_server_integration.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-server/tests/test_aiohttp_server_integration.py @@ -152,3 +152,41 @@ async def test_suppress_instrumentation( await client.get("/test-path") assert len(memory_exporter.get_finished_spans()) == 0 + +@pytest.mark.asyncio +async def test_remove_sensitive_params(tracer, aiohttp_server): + """Test that sensitive information in URLs is properly redacted.""" + _, memory_exporter = tracer + + # Set up instrumentation + AioHttpServerInstrumentor().instrument() + + # Create app with test route + app = aiohttp.web.Application() + async def handler(request): + return aiohttp.web.Response(text="hello") + + app.router.add_get('/status/200', handler) + + # Start the server + server = await aiohttp_server(app) + + # Make request with sensitive data in URL + url = f"http://username:password@{server.host}:{server.port}/status/200?Signature=secret" + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + assert response.status == 200 + assert await response.text() == "hello" + + # Verify redaction in span attributes + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.attributes[HTTP_METHOD] == "GET" + assert span.attributes[HTTP_STATUS_CODE] == 200 + assert span.attributes[HTTP_URL] == f"http://REDACTED:REDACTED@{server.host}:{server.port}/status/200?Signature=REDACTED" + + # Clean up + AioHttpServerInstrumentor().uninstrument() + memory_exporter.clear() \ No newline at end of file diff --git a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py index 8bf9c71aed..57b1b2502b 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py @@ -259,7 +259,7 @@ def client_response_hook(span: Span, scope: dict[str, Any], message: dict[str, A get_custom_headers, normalise_request_header_name, normalise_response_header_name, - remove_url_credentials, + redact_url, sanitize_method, ) @@ -356,7 +356,7 @@ def collect_request_attributes( if _report_old(sem_conv_opt_in_mode): _set_http_url( result, - remove_url_credentials(http_url), + redact_url(http_url), _StabilityMode.DEFAULT, ) http_method = scope.get("method", "") diff --git a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py index 95a874bcdb..ce82f1ff57 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py @@ -1809,12 +1809,13 @@ def test_response_attributes_invalid_status_code(self): otel_asgi.set_status_code(self.span, "Invalid Status Code") self.assertEqual(self.span.set_status.call_count, 1) - def test_credential_removal(self): + def test_remove_sensitive_params(self): self.scope["server"] = ("username:password@mock", 80) self.scope["path"] = "/status/200" + self.scope["query_string"] = b"X-Goog-Signature=1234567890" attrs = otel_asgi.collect_request_attributes(self.scope) self.assertEqual( - attrs[SpanAttributes.HTTP_URL], "http://mock/status/200" + attrs[SpanAttributes.HTTP_URL], "http://REDACTED:REDACTED@mock/status/200?X-Goog-Signature=REDACTED" ) def test_collect_target_attribute_missing(self): diff --git a/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py b/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py index 8c0b712594..f2c29f3b42 100644 --- a/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py @@ -259,7 +259,7 @@ async def async_response_hook(span, request, response): from opentelemetry.trace import SpanKind, Tracer, TracerProvider, get_tracer from opentelemetry.trace.span import Span from opentelemetry.trace.status import StatusCode -from opentelemetry.util.http import remove_url_credentials, sanitize_method +from opentelemetry.util.http import redact_url, sanitize_method _logger = logging.getLogger(__name__) @@ -313,7 +313,7 @@ def _extract_parameters( # In httpx >= 0.20.0, handle_request receives a Request object request: httpx.Request = args[0] method = request.method.encode() - url = httpx.URL(remove_url_credentials(str(request.url))) + url = httpx.URL(redact_url(str(request.url))) headers = request.headers stream = request.stream extensions = request.extensions diff --git a/instrumentation/opentelemetry-instrumentation-httpx/tests/test_httpx_integration.py b/instrumentation/opentelemetry-instrumentation-httpx/tests/test_httpx_integration.py index e9dcc377b5..f10f2a9e80 100644 --- a/instrumentation/opentelemetry-instrumentation-httpx/tests/test_httpx_integration.py +++ b/instrumentation/opentelemetry-instrumentation-httpx/tests/test_httpx_integration.py @@ -1301,12 +1301,12 @@ def test_basic(self): self.assert_span(num_spans=1) self.assert_metrics(num_metrics=1) - def test_credential_removal(self): - new_url = "http://username:password@mock/status/200" + def test_remove_sensitive_params(self): + new_url = "http://username:password@mock/status/200?sig=secret" self.perform_request(new_url) span = self.assert_span() - self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL) + self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], "http://REDACTED:REDACTED@mock/status/200?sig=REDACTED") class TestAsyncIntegration(BaseTestCases.BaseManualTest): @@ -1373,12 +1373,12 @@ def test_basic_multiple(self): self.assert_span(num_spans=2) self.assert_metrics(num_metrics=1) - def test_credential_removal(self): - new_url = "http://username:password@mock/status/200" + def test_remove_sensitive_params(self): + new_url = "http://username:password@mock/status/200?Signature=secret" self.perform_request(new_url) span = self.assert_span() - self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL) + self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], "http://REDACTED:REDACTED@mock/status/200?Signature=REDACTED") class TestSyncInstrumentationIntegration(BaseTestCases.BaseInstrumentorTest): diff --git a/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py b/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py index 9f76b717c9..7cfc3a4fee 100644 --- a/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py @@ -147,7 +147,7 @@ def response_hook(span, request_obj, response): ExcludeList, get_excluded_urls, parse_excluded_urls, - remove_url_credentials, + redact_url, sanitize_method, ) from opentelemetry.util.http.httplib import set_ip_on_next_http_connection @@ -232,7 +232,7 @@ def get_or_create_headers(): method = request.method span_name = get_default_span_name(method) - url = remove_url_credentials(request.url) + url = redact_url(request.url) span_attributes = {} _set_http_method( diff --git a/instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py b/instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py index 4850915494..93f6b33546 100644 --- a/instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py +++ b/instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py @@ -686,12 +686,12 @@ def perform_request(url: str, session: requests.Session = None): return requests.get(url, timeout=5) return session.get(url) - def test_credential_removal(self): - new_url = "http://username:password@mock/status/200" + def test_remove_sensitive_params(self): + new_url = "http://username:password@mock/status/200?AWSAccessKeyId=secret" self.perform_request(new_url) span = self.assert_span() - self.assertEqual(span.attributes[HTTP_URL], self.URL) + self.assertEqual(span.attributes[HTTP_URL], "http://REDACTED:REDACTED@mock/status/200?AWSAccessKeyId=REDACTED") def test_if_headers_equals_none(self): result = requests.get(self.URL, headers=None, timeout=5) diff --git a/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py b/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py index fa0e53bf95..05cf7390e6 100644 --- a/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py +++ b/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py @@ -22,7 +22,7 @@ from opentelemetry.propagate import inject from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import remove_url_credentials +from opentelemetry.util.http import redact_url def _normalize_request(args, kwargs): @@ -75,7 +75,7 @@ def fetch_async( if span.is_recording(): attributes = { - SpanAttributes.HTTP_URL: remove_url_credentials(request.url), + SpanAttributes.HTTP_URL: redact_url(request.url), SpanAttributes.HTTP_METHOD: request.method, } for key, value in attributes.items(): @@ -161,7 +161,7 @@ def _finish_tracing_callback( def _create_metric_attributes(response): metric_attributes = { SpanAttributes.HTTP_STATUS_CODE: response.code, - SpanAttributes.HTTP_URL: remove_url_credentials(response.request.url), + SpanAttributes.HTTP_URL: redact_url(response.request.url), SpanAttributes.HTTP_METHOD: response.request.method, } diff --git a/instrumentation/opentelemetry-instrumentation-tornado/tests/test_instrumentation.py b/instrumentation/opentelemetry-instrumentation-tornado/tests/test_instrumentation.py index daf2ddd846..03de4361aa 100644 --- a/instrumentation/opentelemetry-instrumentation-tornado/tests/test_instrumentation.py +++ b/instrumentation/opentelemetry-instrumentation-tornado/tests/test_instrumentation.py @@ -496,8 +496,8 @@ def test_response_headers(self): set_global_response_propagator(orig) - def test_credential_removal(self): - app = HttpServerMock("test_credential_removal") + def test_remove_sensitive_params(self): + app = HttpServerMock("test_remove_sensitive_params") @app.route("/status/200") def index(): @@ -505,7 +505,7 @@ def index(): with app.run("localhost", 5000): response = self.fetch( - "http://username:password@localhost:5000/status/200" + "http://username:password@localhost:5000/status/200?Signature=secret" ) self.assertEqual(response.code, 200) @@ -518,7 +518,7 @@ def index(): self.assertSpanHasAttributes( client, { - SpanAttributes.HTTP_URL: "http://localhost:5000/status/200", + SpanAttributes.HTTP_URL: "http://REDACTED:REDACTED@localhost:5000/status/200?Signature=REDACTED", SpanAttributes.HTTP_METHOD: "GET", SpanAttributes.HTTP_STATUS_CODE: 200, }, diff --git a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py index 6da278fdc9..6ed83338a0 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py @@ -136,7 +136,7 @@ def response_hook(span: Span, request: Request, response: HTTPResponse): ExcludeList, get_excluded_urls, parse_excluded_urls, - remove_url_credentials, + redact_url, sanitize_method, ) from opentelemetry.util.types import Attributes @@ -258,7 +258,7 @@ def _instrumented_open_call( span_name = _get_span_name(method) - url = remove_url_credentials(url) + url = redact_url(url) data = getattr(request, "data", None) request_size = 0 if data is None else len(data) diff --git a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py index b085cc50d5..6777feb292 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py @@ -512,14 +512,14 @@ def test_requests_timeout_exception(self, *_, **__): span = self.assert_span() self.assertEqual(span.status.status_code, StatusCode.ERROR) - def test_credential_removal(self): + def test_remove_sensitive_params(self): url = "http://username:password@mock/status/200" with self.assertRaises(Exception): self.perform_request(url) span = self.assert_span() - self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL) + self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], "http://REDACTED:REDACTED@mock/status/200") def test_hooks(self): def request_hook(span, request_obj): diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py index f55b3915f5..0d83891b30 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py @@ -273,7 +273,7 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he get_custom_headers, normalise_request_header_name, normalise_response_header_name, - remove_url_credentials, + redact_url, sanitize_method, ) @@ -370,7 +370,7 @@ def collect_request_attributes( else: # old semconv v1.20.0 if _report_old(sem_conv_opt_in_mode): - result[HTTP_URL] = remove_url_credentials( + result[HTTP_URL] = redact_url( wsgiref_util.request_uri(environ) ) diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py index 0cf7d06a0e..5a6e2d21f7 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py @@ -818,11 +818,12 @@ def test_response_attributes_noop(self): self.assertEqual(mock_span.is_recording.call_count, 2) self.assertEqual(attrs[HTTP_STATUS_CODE], 404) - def test_credential_removal(self): + def test_remove_sensitive_params(self): self.environ["HTTP_HOST"] = "username:password@mock" self.environ["PATH_INFO"] = "/status/200" + self.environ["QUERY_STRING"] = "sig=secret" expected = { - HTTP_URL: "http://mock/status/200", + HTTP_URL: "http://REDACTED:REDACTED@mock/status/200?sig=REDACTED", NET_HOST_PORT: 80, } self.assertGreaterEqual( diff --git a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py index 71a6403a7d..42a1ad7f75 100644 --- a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py +++ b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py @@ -58,6 +58,7 @@ SpanAttributes.HTTP_SERVER_NAME, } +PARAMS_TO_REDACT = ["AWSAccessKeyId", "Signature", "sig", "X-Goog-Signature"] class ExcludeList: """Class to exclude certain paths (given as a list of regexes) from tracing requests""" @@ -159,23 +160,23 @@ def parse_excluded_urls(excluded_urls: str) -> ExcludeList: def remove_url_credentials(url: str) -> str: - """Given a string url, remove the username and password only if it is a valid url""" - + """ Given a string url, replace the username and password with the keyword "REDACTED "only if it is a valid url""" try: parsed = urlparse(url) if all([parsed.scheme, parsed.netloc]): # checks for valid url - parsed_url = urlparse(url) - _, _, netloc = parsed.netloc.rpartition("@") - return urlunparse( - ( - parsed_url.scheme, - netloc, - parsed_url.path, - parsed_url.params, - parsed_url.query, - parsed_url.fragment, + if '@' in parsed.netloc: + _, _, host = parsed.netloc.rpartition("@") + new_netloc = "REDACTED:REDACTED@" + host + return urlunparse( + ( + parsed.scheme, + new_netloc, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ) ) - ) except ValueError: # an unparsable url was passed pass return url @@ -255,3 +256,48 @@ def _parse_url_query(url: str): path = parsed_url.path query_params = parsed_url.query return path, query_params + +def redact_query_parameters(url: str) -> str: + """Given a string url, redact sensitive query parameter values""" + try: + parsed = urlparse(url) + if not parsed.query: # No query parameters to redact + return url + + # Check if any of the sensitive parameters are in the query + has_sensitive_params = any(param + "=" in parsed.query for param in PARAMS_TO_REDACT) + if not has_sensitive_params: + return url + + # Process query parameters + query_parts: list[str] = [] + for query_part in parsed.query.split("&"): + if "=" in query_part: + param_name, _ = query_part.split("=", 1) # Parameter name and value + if param_name in PARAMS_TO_REDACT: + query_parts.append(f"{param_name}=REDACTED") + else: + query_parts.append(query_part) + else: + query_parts.append(query_part) # Handle params with no value + + # Reconstruct the URL with redacted query parameters + redacted_query = "&".join(query_parts) + return urlunparse( + ( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + redacted_query, + parsed.fragment, + ) + ) + except ValueError: # an unparsable url was passed + return url + +def redact_url(url: str) -> str: + """Redact sensitive data from the URL, including credentials and query parameters.""" + url = remove_url_credentials(url) + url = redact_query_parameters(url) + return url \ No newline at end of file diff --git a/util/opentelemetry-util-http/tests/test_redact_query_parameters.py b/util/opentelemetry-util-http/tests/test_redact_query_parameters.py new file mode 100644 index 0000000000..ae0a873a62 --- /dev/null +++ b/util/opentelemetry-util-http/tests/test_redact_query_parameters.py @@ -0,0 +1,51 @@ +import unittest +from opentelemetry.util.http import redact_query_parameters + +class TestRedactSensitiveInfo(unittest.TestCase): + def test_redact_goog_signature(self): + url = "https://www.example.com/path?color=blue&X-Goog-Signature=secret" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&X-Goog-Signature=REDACTED") + + def test_no_redaction_needed(self): + url = "https://www.example.com/path?color=blue&query=secret" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&query=secret") + + def test_no_query_parameters(self): + url = "https://www.example.com/path" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path") + + def test_empty_query_string(self): + url = "https://www.example.com/path?" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?") + + def test_empty_url(self): + url = "" + self.assertEqual(redact_query_parameters(url), "") + + def test_redact_aws_access_key_id(self): + url = "https://www.example.com/path?color=blue&AWSAccessKeyId=secrets" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&AWSAccessKeyId=REDACTED") + + def test_api_key_not_in_redact_list(self): + url = "https://www.example.com/path?api_key=secret%20key&user=john" + self.assertNotEqual(redact_query_parameters(url), "https://www.example.com/path?api_key=REDACTED&user=john") + + def test_password_key_not_in_redact_list(self): + url = "https://api.example.com?key=abc&password=123&user=admin" + self.assertNotEqual(redact_query_parameters(url), "https://api.example.com?key=REDACTED&password=REDACTED&user=admin") + + def test_url_with_at_symbol_in_path_and_query(self): + url = "https://github.com/p@th?foo=b@r" + self.assertEqual(redact_query_parameters(url), "https://github.com/p@th?foo=b@r") + + def test_aws_access_key_with_real_format(self): + url = "https://microsoft.com?AWSAccessKeyId=AKIAIOSFODNN7" + self.assertEqual(redact_query_parameters(url), "https://microsoft.com?AWSAccessKeyId=REDACTED") + + def test_signature_parameter(self): + url = "https://service.com?sig=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0" + self.assertEqual(redact_query_parameters(url), "https://service.com?sig=REDACTED") + + def test_signature_with_url_encoding(self): + url = "https://service.com?Signature=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0%3A377" + self.assertEqual(redact_query_parameters(url), "https://service.com?Signature=REDACTED") \ No newline at end of file diff --git a/util/opentelemetry-util-http/tests/test_redact_url.py b/util/opentelemetry-util-http/tests/test_redact_url.py new file mode 100644 index 0000000000..618174b973 --- /dev/null +++ b/util/opentelemetry-util-http/tests/test_redact_url.py @@ -0,0 +1,39 @@ +import unittest +from opentelemetry.util.http import redact_url, PARAMS_TO_REDACT + +class TestRedactUrl(unittest.TestCase): + def test_redact_both_credentials_and_query_params(self): + """Test URL with both credentials and sensitive query parameters.""" + url = "https://user:password@api.example.com/data?AWSAccessKeyId=AKIAIOSFODNN7&color=blue" + expected = "https://REDACTED:REDACTED@api.example.com/data?AWSAccessKeyId=REDACTED&color=blue" + self.assertEqual(redact_url(url), expected) + + def test_multiple_sensitive_query_params(self): + """Test URL with multiple sensitive query parameters.""" + url = "https://admin:1234@example.com/secure?Signature=abc123&X-Goog-Signature=xyz789&sig=def456" + expected = "https://REDACTED:REDACTED@example.com/secure?Signature=REDACTED&X-Goog-Signature=REDACTED&sig=REDACTED" + self.assertEqual(redact_url(url), expected) + + def test_url_with_special_characters(self): + """Test URL with special characters in both credentials and query parameters.""" + url = "https://user@domain:p@ss!word@api.example.com/path?Signature=s%40me+special%20chars&normal=fine" + expected = "https://REDACTED:REDACTED@api.example.com/path?Signature=REDACTED&normal=fine" + self.assertEqual(redact_url(url), expected) + + def test_edge_cases(self): + """Test unusual URL formats and corner cases.""" + # URL with fragment + url1 = "https://user:pass@api.example.com/data?Signature=secret#section" + self.assertEqual(redact_url(url1), "https://REDACTED:REDACTED@api.example.com/data?Signature=REDACTED#section") + + # URL with port number + url2 = "https://user:pass@api.example.com:8443/data?AWSAccessKeyId=secret" + self.assertEqual(redact_url(url2), "https://REDACTED:REDACTED@api.example.com:8443/data?AWSAccessKeyId=REDACTED") + + # URL with IP address instead of domain + url3 = "https://user:pass@192.168.1.1/path?X-Goog-Signature=xyz" + self.assertEqual(redact_url(url3), "https://REDACTED:REDACTED@192.168.1.1/path?X-Goog-Signature=REDACTED") + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/util/opentelemetry-util-http/tests/test_remove_credentials.py b/util/opentelemetry-util-http/tests/test_remove_credentials.py index 2e50f1495f..c874ab820d 100644 --- a/util/opentelemetry-util-http/tests/test_remove_credentials.py +++ b/util/opentelemetry-util-http/tests/test_remove_credentials.py @@ -10,22 +10,28 @@ def test_remove_no_credentials(self): self.assertEqual(cleaned_url, url) def test_remove_credentials(self): - url = "http://someuser:somepass@opentelemetry.io:8080/test/path?query=value" + url = "http://someuser:somepass@opentelemetry.io:8080/test/path?sig=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://opentelemetry.io:8080/test/path?query=value" + cleaned_url, "http://REDACTED:REDACTED@opentelemetry.io:8080/test/path?sig=value" ) def test_remove_credentials_ipv4_literal(self): url = "http://someuser:somepass@127.0.0.1:8080/test/path?query=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://127.0.0.1:8080/test/path?query=value" + cleaned_url, "http://REDACTED:REDACTED@127.0.0.1:8080/test/path?query=value" ) def test_remove_credentials_ipv6_literal(self): url = "http://someuser:somepass@[::1]:8080/test/path?query=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://[::1]:8080/test/path?query=value" + cleaned_url, "http://REDACTED:REDACTED@[::1]:8080/test/path?query=value" ) + + def test_empty_url(self): + url = "" + cleaned_url = remove_url_credentials(url) + self.assertEqual(cleaned_url, url) + \ No newline at end of file