Switch to a metric for total tokens used

colin-sentry · colin-sentry · commit be6e5f42e6a1 · 2024-04-11T20:44:12.000-04:00
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
@@ -97,24 +97,6 @@ class SPANDATA:
     Example: [{"role": "user", "message": "hello"}]
     """
 
-    AI_COMPLETION_TOKENS_USED = "ai.completion_tokens.used"
-    """
-    The number of tokens used to respond to an AI model request
-    Example: 10
-    """
-
-    AI_PROMPT_TOKENS_USED = "ai.prompt_tokens.used"
-    """
-    The number of tokens used to process the input text to an AI model request
-    Example: 20
-    """
-
-    AI_TOTAL_TOKENS_USED = "ai.total_tokens.used"
-    """
-    The number of tokens used in total to process an AI model request
-    Example: 30
-    """
-
     AI_MODEL_ID = "ai.model_id"
     """
     The unique descriptor of the model being execugted
diff --git a/sentry_sdk/integrations/_ai_common.py b/sentry_sdk/integrations/_ai_common.py
@@ -1,6 +1,4 @@
-from sentry_sdk import metrics
 from sentry_sdk._types import TYPE_CHECKING
-from sentry_sdk.consts import SPANDATA
 
 if TYPE_CHECKING:
     from typing import Any, Optional
@@ -39,19 +37,14 @@ def record_token_usage(
 ):
     # type: (Span, Optional[int], Optional[int], Optional[int]) -> None
     if prompt_tokens is not None:
-        span.set_data(SPANDATA.AI_PROMPT_TOKENS_USED, prompt_tokens)
-        metrics.incr(SPANDATA.AI_PROMPT_TOKENS_USED, value=prompt_tokens, unit="tokens")
+        span.set_measurement("ai_prompt_tokens_used", value=prompt_tokens)
     if completion_tokens is not None:
-        span.set_data(SPANDATA.AI_COMPLETION_TOKENS_USED, completion_tokens)
-        metrics.incr(
-            SPANDATA.AI_COMPLETION_TOKENS_USED, value=completion_tokens, unit="tokens"
-        )
+        span.set_measurement("ai_completion_tokens_used", value=completion_tokens)
     if (
         total_tokens is None
         and prompt_tokens is not None
         and completion_tokens is not None
     ):
         total_tokens = prompt_tokens + completion_tokens
     if total_tokens is not None:
-        span.set_data(SPANDATA.AI_TOTAL_TOKENS_USED, total_tokens)
-        metrics.incr(SPANDATA.AI_TOTAL_TOKENS_USED, value=total_tokens, unit="tokens")
+        span.set_measurement("ai_total_tokens_used", total_tokens)
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
@@ -136,6 +136,7 @@ def _create_span(self, run_id, parent_id, **kwargs):
             span = sentry_sdk.start_span(**kwargs)
 
         span.__enter__()
+        span.set_data("sdk.integration", "langchain")
         watched_span = WatchedSpan(span)
         self.span_map[run_id] = watched_span
         self.gc_span_map()
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
@@ -143,6 +143,7 @@ def new_chat_completion(*args, **kwargs):
             op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE, description="Chat Completion"
         )
         span.__enter__()
+        span.set_data("sdk.integration", "openai")
         try:
             res = f(*args, **kwargs)
         except Exception as e:
@@ -225,6 +226,7 @@ def new_embeddings_create(*args, **kwargs):
             op=consts.OP.OPENAI_EMBEDDINGS_CREATE,
             description="OpenAI Embedding Creation",
         ) as span:
+            span.set_data("sdk.integration", "openai")
             integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
             if "input" in kwargs and (
                 should_send_default_pii() and integration.include_prompts
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
@@ -7,7 +7,6 @@
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
 
 from sentry_sdk import start_transaction
-from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations.openai import OpenAIIntegration
 
 from unittest import mock  # python 3.3 and above
@@ -74,9 +73,9 @@ def test_nonstreaming_chat_completion(
         assert "ai.input_messages" not in span["data"]
         assert "ai.responses" not in span["data"]
 
-    assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10
-    assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20
-    assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30
+    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
+    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
+    assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
 
 
 # noinspection PyTypeChecker
@@ -156,9 +155,9 @@ def test_streaming_chat_completion(
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 2
-        assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 1
-        assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 3
+        assert span["measurements"]["ai_completion_tokens_used"]["value"] == 2
+        assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 1
+        assert span["measurements"]["ai_total_tokens_used"]["value"] == 3
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
@@ -223,5 +222,5 @@ def test_embeddings_create(
     else:
         assert "ai.input_messages" not in span["data"]
 
-    assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20
-    assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30
+    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
+    assert span["measurements"]["ai_total_tokens_used"]["value"] == 30