fix(ai): add message truncation to litellm (#4973)

shellmayr · web-flow · commit e9738f62e1d4 · 2025-10-23T13:28:19.000+02:00
diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py
@@ -3,7 +3,11 @@
 import sentry_sdk
 from sentry_sdk import consts
 from sentry_sdk.ai.monitoring import record_token_usage
-from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
+from sentry_sdk.ai.utils import (
+    get_start_span_function,
+    set_data_normalized,
+    truncate_and_annotate_messages,
+)
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations import DidNotEnable, Integration
 from sentry_sdk.scope import should_send_default_pii
@@ -76,9 +80,12 @@ def _input_callback(kwargs):
     # Record messages if allowed
     messages = kwargs.get("messages", [])
     if messages and should_send_default_pii() and integration.include_prompts:
-        set_data_normalized(
-            span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
-        )
+        scope = sentry_sdk.get_current_scope()
+        messages_data = truncate_and_annotate_messages(messages, span, scope)
+        if messages_data is not None:
+            set_data_normalized(
+                span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
+            )
 
     # Record other parameters
     params = {
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
@@ -1,3 +1,4 @@
+import json
 import pytest
 from unittest import mock
 from datetime import datetime
@@ -546,3 +547,61 @@ def dict(self):
 
     # Should have extracted the response message
     assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+
+
+def test_litellm_message_truncation(sentry_init, capture_events):
+    """Test that large messages are truncated properly in LiteLLM integration."""
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    large_content = (
+        "This is a very long message that will exceed our size limits. " * 1000
+    )
+    messages = [
+        {"role": "user", "content": "small message 1"},
+        {"role": "assistant", "content": large_content},
+        {"role": "user", "content": large_content},
+        {"role": "assistant", "content": "small message 4"},
+        {"role": "user", "content": "small message 5"},
+    ]
+    mock_response = MockCompletionResponse()
+
+    with start_transaction(name="litellm test"):
+        kwargs = {
+            "model": "gpt-3.5-turbo",
+            "messages": messages,
+        }
+
+        _input_callback(kwargs)
+        _success_callback(
+            kwargs,
+            mock_response,
+            datetime.now(),
+            datetime.now(),
+        )
+
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
+
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
+
+    chat_span = chat_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
+
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 2
+    assert "small message 4" in str(parsed_messages[0])
+    assert "small message 5" in str(parsed_messages[1])
+    assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5