Skip to content

Redact specific url query string values and url credentials in instrumentations #3508

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- `opentelemetry-instrumentation-aiohttp-client` Add support for HTTP metrics
([#3517](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3517))
- `opentelemetry-util-http` Added support for redacting specific url query string values and url credentials in instrumentations
([#3508](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3508))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def response_hook(span: Span, params: typing.Union[
)
from opentelemetry.trace import Span, SpanKind, TracerProvider, get_tracer
from opentelemetry.trace.status import Status, StatusCode
from opentelemetry.util.http import remove_url_credentials, sanitize_method
from opentelemetry.util.http import redact_url, sanitize_method

_UrlFilterT = typing.Optional[typing.Callable[[yarl.URL], str]]
_RequestHookT = typing.Optional[
Expand Down Expand Up @@ -311,9 +311,9 @@ async def on_request_start(
method = params.method
request_span_name = _get_span_name(method)
request_url = (
remove_url_credentials(trace_config_ctx.url_filter(params.url))
redact_url(trace_config_ctx.url_filter(params.url))
if callable(trace_config_ctx.url_filter)
else remove_url_credentials(str(params.url))
else redact_url(str(params.url))
)

span_attributes = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -762,16 +762,16 @@ async def do_request(url):
)
self.memory_exporter.clear()

def test_credential_removal(self):
def test_remove_sensitive_params(self):
trace_configs = [aiohttp_client.create_trace_config()]

app = HttpServerMock("test_credential_removal")
app = HttpServerMock("test_remove_sensitive_params")

@app.route("/status/200")
def index():
return "hello"

url = "http://username:password@localhost:5000/status/200"
url = "http://username:password@localhost:5000/status/200?Signature=secret"

with app.run("localhost", 5000):
with self.subTest(url=url):
Expand All @@ -793,7 +793,7 @@ async def do_request(url):
(StatusCode.UNSET, None),
{
HTTP_METHOD: "GET",
HTTP_URL: ("http://localhost:5000/status/200"),
HTTP_URL: ("http://REDACTED:REDACTED@localhost:5000/status/200?Signature=REDACTED"),
HTTP_STATUS_CODE: int(HTTPStatus.OK),
},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ async def hello(request):
)
from opentelemetry.semconv.metrics import MetricInstruments
from opentelemetry.trace.status import Status, StatusCode
from opentelemetry.util.http import get_excluded_urls, remove_url_credentials
from opentelemetry.util.http import get_excluded_urls, redact_url

_duration_attrs = [
HTTP_METHOD,
Expand Down Expand Up @@ -148,6 +148,17 @@ def collect_request_attributes(request: web.Request) -> Dict:
request.url.port,
str(request.url),
)

user_info = request.headers.get("Authorization")
if user_info and http_url and "@" not in http_url:
# If there are credentials in Authorization header but not in URL
# Add dummy credentials that will be redacted
parsed = urllib.parse.urlparse(http_url)
netloc_with_auth = f"username:password@{parsed.netloc}"
http_url = urllib.parse.urlunparse(
(parsed.scheme, netloc_with_auth, parsed.path, parsed.params, parsed.query, parsed.fragment)
)

query_string = request.query_string
if query_string and http_url:
if isinstance(query_string, bytes):
Expand All @@ -161,7 +172,7 @@ def collect_request_attributes(request: web.Request) -> Dict:
HTTP_ROUTE: _get_view_func(request),
HTTP_FLAVOR: f"{request.version.major}.{request.version.minor}",
HTTP_TARGET: request.path,
HTTP_URL: remove_url_credentials(http_url),
HTTP_URL: redact_url(http_url),
}

http_method = request.method
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,41 @@ async def test_suppress_instrumentation(
await client.get("/test-path")

assert len(memory_exporter.get_finished_spans()) == 0

@pytest.mark.asyncio
async def test_remove_sensitive_params(tracer, aiohttp_server):
"""Test that sensitive information in URLs is properly redacted."""
_, memory_exporter = tracer

# Set up instrumentation
AioHttpServerInstrumentor().instrument()

# Create app with test route
app = aiohttp.web.Application()
async def handler(request):
return aiohttp.web.Response(text="hello")

app.router.add_get('/status/200', handler)

# Start the server
server = await aiohttp_server(app)

# Make request with sensitive data in URL
url = f"http://username:password@{server.host}:{server.port}/status/200?Signature=secret"
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
assert response.status == 200
assert await response.text() == "hello"

# Verify redaction in span attributes
spans = memory_exporter.get_finished_spans()
assert len(spans) == 1

span = spans[0]
assert span.attributes[HTTP_METHOD] == "GET"
assert span.attributes[HTTP_STATUS_CODE] == 200
assert span.attributes[HTTP_URL] == f"http://REDACTED:REDACTED@{server.host}:{server.port}/status/200?Signature=REDACTED"

# Clean up
AioHttpServerInstrumentor().uninstrument()
memory_exporter.clear()
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def client_response_hook(span: Span, scope: dict[str, Any], message: dict[str, A
get_custom_headers,
normalise_request_header_name,
normalise_response_header_name,
remove_url_credentials,
redact_url,
sanitize_method,
)

Expand Down Expand Up @@ -356,7 +356,7 @@ def collect_request_attributes(
if _report_old(sem_conv_opt_in_mode):
_set_http_url(
result,
remove_url_credentials(http_url),
redact_url(http_url),
_StabilityMode.DEFAULT,
)
http_method = scope.get("method", "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1809,12 +1809,13 @@ def test_response_attributes_invalid_status_code(self):
otel_asgi.set_status_code(self.span, "Invalid Status Code")
self.assertEqual(self.span.set_status.call_count, 1)

def test_credential_removal(self):
def test_remove_sensitive_params(self):
self.scope["server"] = ("username:password@mock", 80)
self.scope["path"] = "/status/200"
self.scope["query_string"] = b"X-Goog-Signature=1234567890"
attrs = otel_asgi.collect_request_attributes(self.scope)
self.assertEqual(
attrs[SpanAttributes.HTTP_URL], "http://mock/status/200"
attrs[SpanAttributes.HTTP_URL], "http://REDACTED:REDACTED@mock/status/200?X-Goog-Signature=REDACTED"
)

def test_collect_target_attribute_missing(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ async def async_response_hook(span, request, response):
from opentelemetry.trace import SpanKind, Tracer, TracerProvider, get_tracer
from opentelemetry.trace.span import Span
from opentelemetry.trace.status import StatusCode
from opentelemetry.util.http import remove_url_credentials, sanitize_method
from opentelemetry.util.http import redact_url, sanitize_method

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -298,7 +298,7 @@ def _extract_parameters(
# In httpx >= 0.20.0, handle_request receives a Request object
request: httpx.Request = args[0]
method = request.method.encode()
url = httpx.URL(remove_url_credentials(str(request.url)))
url = httpx.URL(redact_url(str(request.url)))
headers = request.headers
stream = request.stream
extensions = request.extensions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1127,12 +1127,12 @@ def perform_request(
return self.client.request(method, url, headers=headers)
return client.request(method, url, headers=headers)

def test_credential_removal(self):
new_url = "http://username:password@mock/status/200"
def test_remove_sensitive_params(self):
new_url = "http://username:password@mock/status/200?sig=secret"
self.perform_request(new_url)
span = self.assert_span()

self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL)
self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], "http://REDACTED:REDACTED@mock/status/200?sig=REDACTED")


class TestAsyncIntegration(BaseTestCases.BaseManualTest):
Expand Down Expand Up @@ -1196,12 +1196,12 @@ def test_basic_multiple(self):
)
self.assert_span(num_spans=2)

def test_credential_removal(self):
new_url = "http://username:password@mock/status/200"
def test_remove_sensitive_params(self):
new_url = "http://username:password@mock/status/200?Signature=secret"
self.perform_request(new_url)
span = self.assert_span()

self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL)
self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], "http://REDACTED:REDACTED@mock/status/200?Signature=REDACTED")


class TestSyncInstrumentationIntegration(BaseTestCases.BaseInstrumentorTest):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def response_hook(span, request_obj, response):
ExcludeList,
get_excluded_urls,
parse_excluded_urls,
remove_url_credentials,
redact_url,
sanitize_method,
)
from opentelemetry.util.http.httplib import set_ip_on_next_http_connection
Expand Down Expand Up @@ -232,7 +232,7 @@ def get_or_create_headers():
method = request.method
span_name = get_default_span_name(method)

url = remove_url_credentials(request.url)
url = redact_url(request.url)

span_attributes = {}
_set_http_method(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -686,12 +686,12 @@ def perform_request(url: str, session: requests.Session = None):
return requests.get(url, timeout=5)
return session.get(url)

def test_credential_removal(self):
new_url = "http://username:password@mock/status/200"
def test_remove_sensitive_params(self):
new_url = "http://username:password@mock/status/200?AWSAccessKeyId=secret"
self.perform_request(new_url)
span = self.assert_span()

self.assertEqual(span.attributes[HTTP_URL], self.URL)
self.assertEqual(span.attributes[HTTP_URL], "http://REDACTED:REDACTED@mock/status/200?AWSAccessKeyId=REDACTED")

def test_if_headers_equals_none(self):
result = requests.get(self.URL, headers=None, timeout=5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from opentelemetry.propagate import inject
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace.status import Status, StatusCode
from opentelemetry.util.http import remove_url_credentials
from opentelemetry.util.http import redact_url


def _normalize_request(args, kwargs):
Expand Down Expand Up @@ -75,7 +75,7 @@ def fetch_async(

if span.is_recording():
attributes = {
SpanAttributes.HTTP_URL: remove_url_credentials(request.url),
SpanAttributes.HTTP_URL: redact_url(request.url),
SpanAttributes.HTTP_METHOD: request.method,
}
for key, value in attributes.items():
Expand Down Expand Up @@ -161,7 +161,7 @@ def _finish_tracing_callback(
def _create_metric_attributes(response):
metric_attributes = {
SpanAttributes.HTTP_STATUS_CODE: response.code,
SpanAttributes.HTTP_URL: remove_url_credentials(response.request.url),
SpanAttributes.HTTP_URL: redact_url(response.request.url),
SpanAttributes.HTTP_METHOD: response.request.method,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,16 +496,16 @@ def test_response_headers(self):

set_global_response_propagator(orig)

def test_credential_removal(self):
app = HttpServerMock("test_credential_removal")
def test_remove_sensitive_params(self):
app = HttpServerMock("test_remove_sensitive_params")

@app.route("/status/200")
def index():
return "hello"

with app.run("localhost", 5000):
response = self.fetch(
"http://username:password@localhost:5000/status/200"
"http://username:password@localhost:5000/status/200?Signature=secret"
)
self.assertEqual(response.code, 200)

Expand All @@ -518,7 +518,7 @@ def index():
self.assertSpanHasAttributes(
client,
{
SpanAttributes.HTTP_URL: "http://localhost:5000/status/200",
SpanAttributes.HTTP_URL: "http://REDACTED:REDACTED@localhost:5000/status/200?Signature=REDACTED",
SpanAttributes.HTTP_METHOD: "GET",
SpanAttributes.HTTP_STATUS_CODE: 200,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def response_hook(span: Span, request: Request, response: HTTPResponse):
ExcludeList,
get_excluded_urls,
parse_excluded_urls,
remove_url_credentials,
redact_url,
sanitize_method,
)
from opentelemetry.util.types import Attributes
Expand Down Expand Up @@ -258,7 +258,7 @@ def _instrumented_open_call(

span_name = _get_span_name(method)

url = remove_url_credentials(url)
url = redact_url(url)

data = getattr(request, "data", None)
request_size = 0 if data is None else len(data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -512,14 +512,14 @@ def test_requests_timeout_exception(self, *_, **__):
span = self.assert_span()
self.assertEqual(span.status.status_code, StatusCode.ERROR)

def test_credential_removal(self):
def test_remove_sensitive_params(self):
url = "http://username:password@mock/status/200"

with self.assertRaises(Exception):
self.perform_request(url)

span = self.assert_span()
self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL)
self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], "http://REDACTED:REDACTED@mock/status/200")

def test_hooks(self):
def request_hook(span, request_obj):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he
get_custom_headers,
normalise_request_header_name,
normalise_response_header_name,
remove_url_credentials,
redact_url,
sanitize_method,
)

Expand Down Expand Up @@ -370,7 +370,7 @@ def collect_request_attributes(
else:
# old semconv v1.20.0
if _report_old(sem_conv_opt_in_mode):
result[HTTP_URL] = remove_url_credentials(
result[HTTP_URL] = redact_url(
wsgiref_util.request_uri(environ)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -818,11 +818,12 @@ def test_response_attributes_noop(self):
self.assertEqual(mock_span.is_recording.call_count, 2)
self.assertEqual(attrs[HTTP_STATUS_CODE], 404)

def test_credential_removal(self):
def test_remove_sensitive_params(self):
self.environ["HTTP_HOST"] = "username:password@mock"
self.environ["PATH_INFO"] = "/status/200"
self.environ["QUERY_STRING"] = "sig=secret"
expected = {
HTTP_URL: "http://mock/status/200",
HTTP_URL: "http://REDACTED:REDACTED@mock/status/200?sig=REDACTED",
NET_HOST_PORT: 80,
}
self.assertGreaterEqual(
Expand Down
Loading