Lint picking

csibbitt · csibbitt · commit cd5bb035ff39 · 2025-07-17T11:12:26.000-04:00
diff --git a/src/utils/token_counter.py b/src/utils/token_counter.py
@@ -11,6 +11,8 @@
 from typing import Sequence
 
 from cachetools import TTLCache  # type: ignore
+import tiktoken
+
 
 from llama_stack_client.types import (
     UserMessage,
@@ -19,7 +21,6 @@
     CompletionMessage,
 )
 from models.requests import QueryRequest
-import tiktoken
 
 from configuration import configuration, AppConfig
 from constants import DEFAULT_ESTIMATION_TOKENIZER
@@ -127,7 +128,11 @@ def count_turn_tokens(
         }
 
     def count_conversation_turn_tokens(
-        self, conversation_id: str, system_prompt: str, query_request: QueryRequest, response: str = ""
+        self,
+        conversation_id: str,
+        system_prompt: str,
+        query_request: QueryRequest,
+        response: str = "",
     ) -> dict[str, int]:
         """Count tokens for a conversation turn with cumulative tracking.
 
@@ -148,7 +153,9 @@ def count_conversation_turn_tokens(
                 - 'output_tokens': Total tokens in the response message
         """
         # Get the current turn's token usage
-        turn_token_usage = self.count_turn_tokens(system_prompt, query_request, response)
+        turn_token_usage = self.count_turn_tokens(
+            system_prompt, query_request, response
+        )
 
         # Get cumulative input tokens for this conversation
         cumulative_input_tokens = _conversation_cache.get(conversation_id, 0)
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py
@@ -760,7 +760,7 @@ def test_retrieve_response_with_mcp_servers_and_mcp_headers(mocker):
         },
     }
 
-    response, conversation_id, token_usage = retrieve_response(
+    response, conversation_id, _ = retrieve_response(
         mock_client,
         model_id,
         query_request,
@@ -1204,7 +1204,11 @@ def test_auth_tuple_unpacking_in_query_endpoint_handler(mocker):
 
     mock_retrieve_response = mocker.patch(
         "app.endpoints.query.retrieve_response",
-        return_value=("test response", "test_conversation_id", {"input_tokens": 10, "output_tokens": 20}),
+        return_value=(
+            "test response",
+            "test_conversation_id",
+            {"input_tokens": 10, "output_tokens": 20},
+        ),
     )
 
     mocker.patch("app.endpoints.query.select_model_id", return_value="test_model")
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
@@ -151,7 +151,8 @@ async def _test_streaming_query_endpoint_handler(mocker, store_transcript=False)
 
     # Mock the streaming response from LLama Stack
     mock_streaming_response = mocker.AsyncMock()
-    # Currently usage is not returned by the API, we simulate by using del to prevent pytest from returning a Mock
+    # Currently usage is not returned by the API
+    # we simulate by using del to prevent pytest from returning a Mock
     del mock_streaming_response.usage
     mock_streaming_response.__aiter__.return_value = [
         mocker.Mock(
@@ -862,7 +863,7 @@ async def test_retrieve_response_with_mcp_servers_and_mcp_headers(mocker):
         },
     }
 
-    response, conversation_id, token_usage = await retrieve_response(
+    response, conversation_id, _ = await retrieve_response(
         mock_client,
         model_id,
         query_request,
@@ -1224,7 +1225,11 @@ async def test_auth_tuple_unpacking_in_streaming_query_endpoint_handler(mocker):
     mock_streaming_response.__aiter__.return_value = iter([])
     mock_retrieve_response = mocker.patch(
         "app.endpoints.streaming_query.retrieve_response",
-        return_value=(mock_streaming_response, "test_conversation_id", {"input_tokens": 10, "output_tokens": 20}),
+        return_value=(
+            mock_streaming_response,
+            "test_conversation_id",
+            {"input_tokens": 10, "output_tokens": 20},
+        ),
     )
 
     mocker.patch(
diff --git a/tests/unit/utils/test_token_counter.py b/tests/unit/utils/test_token_counter.py
@@ -1,7 +1,8 @@
 """Unit tests for token counter utilities."""
 
-from utils.token_counter import TokenCounter
 from llama_stack_client.types import UserMessage, CompletionMessage
+
+from utils.token_counter import TokenCounter
 from models.requests import QueryRequest, Attachment
 from configuration import AppConfig
 
@@ -31,6 +32,7 @@ class TestTokenCounter:
     """Test cases for TokenCounter class."""
 
     def setup_class(self):
+        """Setup the test class."""
         cfg = AppConfig()
         cfg.init_from_dict(config_dict)
 
@@ -40,6 +42,7 @@ def test_count_tokens_empty_string(self):
         assert counter.count_tokens("") == 0
 
     def test_count_tokens_simple(self):
+        """Test counting tokens for a simple message."""
         counter = TokenCounter("llama3.2:1b")
         assert counter.count_tokens("Hello World!") == 3
 
@@ -104,21 +107,18 @@ def test_count_conversation_turn_tokens_with_attachments(self):
             Attachment(
                 attachment_type="configuration",
                 content_type="application/yaml",
-                content="kind: Pod\nmetadata:\n  name: test-pod\nspec:\n  containers:\n  - name: app",
+                content="kind: Pod\nmetadata:\n  name: test-pod\nspec:\n"
+                + "  containers:\n  - name: app\n    image: nginx:latest",
             ),
         ]
 
         query_request = QueryRequest(
-            query="Analyze these files for me",
-            attachments=attachments
+            query="Analyze these files for me", attachments=attachments
         )
 
         # Test the conversation turn with attachments
         result = counter.count_conversation_turn_tokens(
-            "conv_with_attachments",
-            "System prompt",
-            query_request,
-            "Analysis complete"
+            "conv_with_attachments", "System prompt", query_request, "Analysis complete"
         )
 
         # Verify that the result contains the expected structure
@@ -142,7 +142,7 @@ def test_count_conversation_turn_tokens_with_attachments(self):
             "conv_no_attachments",
             "System prompt",
             query_request_no_attachments,
-            "Analysis complete"
+            "Analysis complete",
         )
 
         # The version with attachments should have more input tokens