fix: Enable usage metadata in LiteLLM streaming

lizzij · copybara-github · commit f9569bbb1afb · 2025-10-29T20:37:54.000-07:00
Closes #3181 Co-authored-by: Eliza Huang <heliza@google.com> PiperOrigin-RevId: 825833660
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -920,6 +920,7 @@ async def generate_content_async(
       # Track function calls by index
       function_calls = {}  # index -> {name, args, id}
       completion_args["stream"] = True
+      completion_args["stream_options"] = {"include_usage": True}
       aggregated_llm_response = None
       aggregated_llm_response_with_tool_call = None
       usage_metadata = None
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
@@ -1606,6 +1606,40 @@ async def test_generate_content_async_stream_with_usage_metadata(
   )
 
 
+@pytest.mark.asyncio
+async def test_generate_content_async_stream_with_usage_metadata_only(
+    mock_completion, lite_llm_instance
+):
+  streaming_model_response_with_usage_metadata = [
+      ModelResponse(
+          usage={
+              "prompt_tokens": 10,
+              "completion_tokens": 5,
+              "total_tokens": 15,
+          },
+          choices=[
+              StreamingChoices(
+                  finish_reason="stop",
+                  delta=Delta(content=""),
+              )
+          ],
+      ),
+  ]
+  mock_completion.return_value = iter(
+      streaming_model_response_with_usage_metadata
+  )
+
+  unused_responses = [
+      response
+      async for response in lite_llm_instance.generate_content_async(
+          LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
+      )
+  ]
+  mock_completion.assert_called_once()
+  _, kwargs = mock_completion.call_args
+  assert kwargs["stream_options"] == {"include_usage": True}
+
+
 @pytest.mark.asyncio
 async def test_generate_content_async_multiple_function_calls(
     mock_completion, lite_llm_instance