diff --git a/sentry_sdk/ai/monitoring.py b/sentry_sdk/ai/monitoring.py index 3fccd65eca..5940fb5bc2 100644 --- a/sentry_sdk/ai/monitoring.py +++ b/sentry_sdk/ai/monitoring.py @@ -107,9 +107,9 @@ def record_token_usage( if ai_pipeline_name: span.set_attribute(SPANDATA.AI_PIPELINE_NAME, ai_pipeline_name) if prompt_tokens is not None: - span.set_attribute("ai.prompt_tokens.used", prompt_tokens) + span.set_attribute(SPANDATA.AI_PROMPT_TOKENS_USED, prompt_tokens) if completion_tokens is not None: - span.set_attribute("ai.completion_tokens.used", completion_tokens) + span.set_attribute(SPANDATA.AI_COMPLETION_TOKENS_USED, completion_tokens) if ( total_tokens is None and prompt_tokens is not None @@ -117,4 +117,4 @@ def record_token_usage( ): total_tokens = prompt_tokens + completion_tokens if total_tokens is not None: - span.set_attribute("ai.total_tokens.used", total_tokens) + span.set_attribute(SPANDATA.AI_TOTAL_TOKENS_USED, total_tokens) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 9f925f89c7..f4d877b0bc 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -251,6 +251,24 @@ class SPANDATA: Example: "qa-pipeline" """ + AI_PROMPT_TOKENS_USED = "ai.prompt_tokens.used" + """ + The number of input prompt tokens used by the model. + Example: 10 + """ + + AI_COMPLETION_TOKENS_USED = "ai.completion_tokens.used" + """ + The number of output completion tokens used by the model. + Example: 10 + """ + + AI_TOTAL_TOKENS_USED = "ai.total_tokens.used" + """ + The total number of tokens (input + output) used by the request to the model. + Example: 20 + """ + AI_TEXTS = "ai.texts" """ Raw text inputs provided to the model. diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 407d2a6ac3..70f3f423fe 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -127,9 +127,9 @@ def test_nonstreaming_create_message( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 10 - assert span["data"]["ai.completion_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 assert span["data"][SPANDATA.AI_STREAMING] is False @@ -197,9 +197,9 @@ async def test_nonstreaming_create_message_async( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 10 - assert span["data"]["ai.completion_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 assert span["data"][SPANDATA.AI_STREAMING] is False @@ -299,9 +299,9 @@ def test_streaming_create_message( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 10 - assert span["data"]["ai.completion_tokens.used"] == 30 - assert span["data"]["ai.total_tokens.used"] == 40 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 30 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 40 assert span["data"][SPANDATA.AI_STREAMING] is True @@ -404,9 +404,9 @@ async def test_streaming_create_message_async( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 10 - assert span["data"]["ai.completion_tokens.used"] == 30 - assert span["data"]["ai.total_tokens.used"] == 40 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 30 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 40 assert span["data"][SPANDATA.AI_STREAMING] is True @@ -536,9 +536,9 @@ def test_streaming_create_message_with_input_json_delta( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 366 - assert span["data"]["ai.completion_tokens.used"] == 51 - assert span["data"]["ai.total_tokens.used"] == 417 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 366 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 51 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 417 assert span["data"][SPANDATA.AI_STREAMING] is True @@ -675,9 +675,9 @@ async def test_streaming_create_message_with_input_json_delta_async( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 366 - assert span["data"]["ai.completion_tokens.used"] == 51 - assert span["data"]["ai.total_tokens.used"] == 417 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 366 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 51 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 417 assert span["data"][SPANDATA.AI_STREAMING] is True @@ -831,6 +831,6 @@ def test_add_ai_data_to_span_with_input_json_delta(sentry_init, capture_events): [{"type": "text", "text": "{'test': 'data','more': 'json'}"}] ) assert span["data"][SPANDATA.AI_STREAMING] is True - assert span["data"]["ai.prompt_tokens.used"] == 10 - assert span["data"]["ai.completion_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 diff --git a/tests/integrations/cohere/test_cohere.py b/tests/integrations/cohere/test_cohere.py index b3611925c3..5f2119876c 100644 --- a/tests/integrations/cohere/test_cohere.py +++ b/tests/integrations/cohere/test_cohere.py @@ -65,9 +65,9 @@ def test_nonstreaming_chat( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.completion_tokens.used"] == 10 - assert span["data"]["ai.prompt_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 # noinspection PyTypeChecker @@ -137,9 +137,9 @@ def test_streaming_chat(sentry_init, capture_events, send_default_pii, include_p assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.completion_tokens.used"] == 10 - assert span["data"]["ai.prompt_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 def test_bad_chat(sentry_init, capture_events): @@ -201,8 +201,8 @@ def test_embed(sentry_init, capture_events, send_default_pii, include_prompts): else: assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 10 - assert span["data"]["ai.total_tokens.used"] == 10 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 10 def test_span_origin_chat(sentry_init, capture_events): diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index ee5a9d61c6..e81a3f16e5 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -75,7 +75,7 @@ def test_nonstreaming_chat_completion( assert SPANDATA.AI_RESPONSES not in span["data"] if details_arg: - assert span["data"]["ai.total_tokens.used"] == 10 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 10 @pytest.mark.parametrize( @@ -134,7 +134,7 @@ def test_streaming_chat_completion( assert SPANDATA.AI_RESPONSES not in span["data"] if details_arg: - assert span["data"]["ai.total_tokens.used"] == 10 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 10 def test_bad_chat_completion(sentry_init, capture_events): diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 1a2f3469ed..4e06af9c75 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -181,13 +181,13 @@ def test_langchain_agent( assert len(list(x for x in tx["spans"] if x["op"] == "ai.run.langchain")) > 0 if use_unknown_llm_type: - assert "ai.prompt_tokens.used" in chat_spans[0]["data"] - assert "ai.total_tokens.used" in chat_spans[0]["data"] + assert SPANDATA.AI_PROMPT_TOKENS_USED in chat_spans[0]["data"] + assert SPANDATA.AI_TOTAL_TOKENS_USED in chat_spans[0]["data"] else: # important: to avoid double counting, we do *not* measure # tokens used if we have an explicit integration (e.g. OpenAI) - assert "ai.prompt_tokens.used" not in chat_spans[0]["data"] - assert "ai.total_tokens.used" not in chat_spans[0]["data"] + assert SPANDATA.AI_PROMPT_TOKENS_USED not in chat_spans[0]["data"] + assert SPANDATA.AI_TOTAL_TOKENS_USED not in chat_spans[0]["data"] if send_default_pii and include_prompts: assert ( diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index b84da43ce2..8baeddb1c2 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -90,9 +90,9 @@ def test_nonstreaming_chat_completion( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.completion_tokens.used"] == 10 - assert span["data"]["ai.prompt_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 @pytest.mark.asyncio @@ -132,9 +132,9 @@ async def test_nonstreaming_chat_completion_async( assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] assert SPANDATA.AI_RESPONSES not in span["data"] - assert span["data"]["ai.completion_tokens.used"] == 10 - assert span["data"]["ai.prompt_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 def tiktoken_encoding_if_installed(): @@ -228,9 +228,9 @@ def test_streaming_chat_completion( try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["ai.completion_tokens.used"] == 2 - assert span["data"]["ai.prompt_tokens.used"] == 1 - assert span["data"]["ai.total_tokens.used"] == 3 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 2 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 1 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 3 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -324,9 +324,9 @@ async def test_streaming_chat_completion_async( try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["ai.completion_tokens.used"] == 2 - assert span["data"]["ai.prompt_tokens.used"] == 1 - assert span["data"]["ai.total_tokens.used"] == 3 + assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 2 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 1 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 3 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -410,8 +410,8 @@ def test_embeddings_create( else: assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 @pytest.mark.asyncio @@ -458,8 +458,8 @@ async def test_embeddings_create_async( else: assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] - assert span["data"]["ai.prompt_tokens.used"] == 20 - assert span["data"]["ai.total_tokens.used"] == 30 + assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20 + assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30 @pytest.mark.forked