BerriAI
diff --git a/‎litellm/files/main.py‎
Lines changed: 14 additions & 11 deletions b/‎litellm/files/main.py‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎litellm/litellm_core_utils/litellm_logging.py‎
Lines changed: 5 additions & 1 deletion b/‎litellm/litellm_core_utils/litellm_logging.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎litellm/litellm_core_utils/prompt_templates/common_utils.py‎
Lines changed: 56 additions & 0 deletions b/‎litellm/litellm_core_utils/prompt_templates/common_utils.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/streaming_chunk_builder_utils.py‎
Lines changed: 38 additions & 3 deletions b/‎litellm/litellm_core_utils/streaming_chunk_builder_utils.py‎
Lines changed: 38 additions & 3 deletions
diff --git a/‎litellm/llms/anthropic/chat/handler.py‎
Lines changed: 4 additions & 7 deletions b/‎litellm/llms/anthropic/chat/handler.py‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎litellm/llms/anthropic/chat/transformation.py‎
Lines changed: 18 additions & 1 deletion b/‎litellm/llms/anthropic/chat/transformation.py‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎litellm/llms/azure/files/handler.py‎
Lines changed: 3 additions & 3 deletions b/‎litellm/llms/azure/files/handler.py‎
Lines changed: 3 additions & 3 deletions
@@ -63,16 +63,17 @@ async def acreate_file(
         loop = asyncio.get_event_loop()
         kwargs["acreate_file"] = True
 
-        # Use a partial function to pass your keyword arguments
-        func = partial(
-            create_file,
-            file,
-            purpose,
-            custom_llm_provider,
-            extra_headers,
-            extra_body,
+        call_args = {
+            "file": file,
+            "purpose": purpose,
+            "custom_llm_provider": custom_llm_provider,
+            "extra_headers": extra_headers,
+            "extra_body": extra_body,
             **kwargs,
-        )
+        }
+
+        # Use a partial function to pass your keyword arguments
+        func = partial(create_file, **call_args)
 
         # Add the context to the function
         ctx = contextvars.copy_context()
@@ -92,7 +93,7 @@ async def acreate_file(
 def create_file(
     file: FileTypes,
     purpose: Literal["assistants", "batch", "fine-tune"],
-    custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai",
+    custom_llm_provider: Optional[Literal["openai", "azure", "vertex_ai"]] = None,
     extra_headers: Optional[Dict[str, str]] = None,
     extra_body: Optional[Dict[str, str]] = None,
     **kwargs,
@@ -101,6 +102,8 @@ def create_file(
     Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
 
     LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
+
+    Specify either provider_list or custom_llm_provider.
     """
     try:
         _is_async = kwargs.pop("acreate_file", False) is True
@@ -120,7 +123,7 @@ def create_file(
         if (
             timeout is not None
             and isinstance(timeout, httpx.Timeout)
-            and supports_httpx_timeout(custom_llm_provider) is False
+            and supports_httpx_timeout(cast(str, custom_llm_provider)) is False
         ):
             read_timeout = timeout.read or 600
             timeout = read_timeout  # default 10 min timeout
 
@@ -457,8 +457,12 @@ def get_chat_completion_prompt(
         non_default_params: dict,
         prompt_id: str,
         prompt_variables: Optional[dict],
+        prompt_management_logger: Optional[CustomLogger] = None,
     ) -> Tuple[str, List[AllMessageValues], dict]:
-        custom_logger = self.get_custom_logger_for_prompt_management(model)
+        custom_logger = (
+            prompt_management_logger
+            or self.get_custom_logger_for_prompt_management(model)
+        )
         if custom_logger:
             (
                 model,
 
@@ -7,6 +7,7 @@
 from litellm.types.llms.openai import (
     AllMessageValues,
     ChatCompletionAssistantMessage,
+    ChatCompletionFileObject,
     ChatCompletionUserMessage,
 )
 from litellm.types.utils import Choices, ModelResponse, StreamingChoices
@@ -292,3 +293,58 @@ def get_completion_messages(
         messages, assistant_continue_message, ensure_alternating_roles
     )
     return messages
+
+
+def get_file_ids_from_messages(messages: List[AllMessageValues]) -> List[str]:
+    """
+    Gets file ids from messages
+    """
+    file_ids = []
+    for message in messages:
+        if message.get("role") == "user":
+            content = message.get("content")
+            if content:
+                if isinstance(content, str):
+                    continue
+                for c in content:
+                    if c["type"] == "file":
+                        file_object = cast(ChatCompletionFileObject, c)
+                        file_object_file_field = file_object["file"]
+                        file_id = file_object_file_field.get("file_id")
+                        if file_id:
+                            file_ids.append(file_id)
+    return file_ids
+
+
+def update_messages_with_model_file_ids(
+    messages: List[AllMessageValues],
+    model_id: str,
+    model_file_id_mapping: Dict[str, Dict[str, str]],
+) -> List[AllMessageValues]:
+    """
+    Updates messages with model file ids.
+
+    model_file_id_mapping: Dict[str, Dict[str, str]] = {
+        "litellm_proxy/file_id": {
+            "model_id": "provider_file_id"
+        }
+    }
+    """
+    for message in messages:
+        if message.get("role") == "user":
+            content = message.get("content")
+            if content:
+                if isinstance(content, str):
+                    continue
+                for c in content:
+                    if c["type"] == "file":
+                        file_object = cast(ChatCompletionFileObject, c)
+                        file_object_file_field = file_object["file"]
+                        file_id = file_object_file_field.get("file_id")
+                        if file_id:
+                            provider_file_id = (
+                                model_file_id_mapping.get(file_id, {}).get(model_id)
+                                or file_id
+                            )
+                            file_object_file_field["file_id"] = provider_file_id
+    return messages
@@ -1,6 +1,6 @@
 import base64
 import time
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union, cast
 
 from litellm.types.llms.openai import (
     ChatCompletionAssistantContentValue,
@@ -9,7 +9,9 @@
 from litellm.types.utils import (
     ChatCompletionAudioResponse,
     ChatCompletionMessageToolCall,
+    Choices,
     CompletionTokensDetails,
+    CompletionTokensDetailsWrapper,
     Function,
     FunctionCall,
     ModelResponse,
@@ -203,14 +205,14 @@ def get_combined_function_call_content(
         )
 
     def get_combined_content(
-        self, chunks: List[Dict[str, Any]]
+        self, chunks: List[Dict[str, Any]], delta_key: str = "content"
     ) -> ChatCompletionAssistantContentValue:
         content_list: List[str] = []
         for chunk in chunks:
             choices = chunk["choices"]
             for choice in choices:
                 delta = choice.get("delta", {})
-                content = delta.get("content", "")
+                content = delta.get(delta_key, "")
                 if content is None:
                     continue  # openai v1.0.0 sets content = None for chunks
                 content_list.append(content)
@@ -221,6 +223,11 @@ def get_combined_content(
         # Update the "content" field within the response dictionary
         return combined_content
 
+    def get_combined_reasoning_content(
+        self, chunks: List[Dict[str, Any]]
+    ) -> ChatCompletionAssistantContentValue:
+        return self.get_combined_content(chunks, delta_key="reasoning_content")
+
     def get_combined_audio_content(
         self, chunks: List[Dict[str, Any]]
     ) -> ChatCompletionAudioResponse:
@@ -296,12 +303,27 @@ def _usage_chunk_calculation_helper(self, usage_chunk: Usage) -> dict:
             "prompt_tokens_details": prompt_tokens_details,
         }
 
+    def count_reasoning_tokens(self, response: ModelResponse) -> int:
+        reasoning_tokens = 0
+        for choice in response.choices:
+            if (
+                hasattr(cast(Choices, choice).message, "reasoning_content")
+                and cast(Choices, choice).message.reasoning_content is not None
+            ):
+                reasoning_tokens += token_counter(
+                    text=cast(Choices, choice).message.reasoning_content,
+                    count_response_tokens=True,
+                )
+
+        return reasoning_tokens
+
     def calculate_usage(
         self,
         chunks: List[Union[Dict[str, Any], ModelResponse]],
         model: str,
         completion_output: str,
         messages: Optional[List] = None,
+        reasoning_tokens: Optional[int] = None,
     ) -> Usage:
         """
         Calculate usage for the given chunks.
@@ -382,6 +404,19 @@ def calculate_usage(
             )  # for anthropic
         if completion_tokens_details is not None:
             returned_usage.completion_tokens_details = completion_tokens_details
+
+        if reasoning_tokens is not None:
+            if returned_usage.completion_tokens_details is None:
+                returned_usage.completion_tokens_details = (
+                    CompletionTokensDetailsWrapper(reasoning_tokens=reasoning_tokens)
+                )
+            elif (
+                returned_usage.completion_tokens_details is not None
+                and returned_usage.completion_tokens_details.reasoning_tokens is None
+            ):
+                returned_usage.completion_tokens_details.reasoning_tokens = (
+                    reasoning_tokens
+                )
         if prompt_tokens_details is not None:
             returned_usage.prompt_tokens_details = prompt_tokens_details
 
 
@@ -21,7 +21,6 @@
     get_async_httpx_client,
 )
 from litellm.types.llms.anthropic import (
-    AnthropicChatCompletionUsageBlock,
     ContentBlockDelta,
     ContentBlockStart,
     ContentBlockStop,
@@ -32,13 +31,13 @@
 from litellm.types.llms.openai import (
     ChatCompletionThinkingBlock,
     ChatCompletionToolCallChunk,
-    ChatCompletionUsageBlock,
 )
 from litellm.types.utils import (
     Delta,
     GenericStreamingChunk,
     ModelResponseStream,
     StreamingChoices,
+    Usage,
 )
 from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
 
@@ -487,10 +486,8 @@ def check_empty_tool_call_args(self) -> bool:
             return True
         return False
 
-    def _handle_usage(
-        self, anthropic_usage_chunk: Union[dict, UsageDelta]
-    ) -> AnthropicChatCompletionUsageBlock:
-        usage_block = AnthropicChatCompletionUsageBlock(
+    def _handle_usage(self, anthropic_usage_chunk: Union[dict, UsageDelta]) -> Usage:
+        usage_block = Usage(
             prompt_tokens=anthropic_usage_chunk.get("input_tokens", 0),
             completion_tokens=anthropic_usage_chunk.get("output_tokens", 0),
             total_tokens=anthropic_usage_chunk.get("input_tokens", 0)
@@ -581,7 +578,7 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
             text = ""
             tool_use: Optional[ChatCompletionToolCallChunk] = None
             finish_reason = ""
-            usage: Optional[ChatCompletionUsageBlock] = None
+            usage: Optional[Usage] = None
             provider_specific_fields: Dict[str, Any] = {}
             reasoning_content: Optional[str] = None
             thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
 
@@ -33,9 +33,16 @@
     ChatCompletionToolCallFunctionChunk,
     ChatCompletionToolParam,
 )
+from litellm.types.utils import CompletionTokensDetailsWrapper
 from litellm.types.utils import Message as LitellmMessage
 from litellm.types.utils import PromptTokensDetailsWrapper
-from litellm.utils import ModelResponse, Usage, add_dummy_tool, has_tool_call_blocks
+from litellm.utils import (
+    ModelResponse,
+    Usage,
+    add_dummy_tool,
+    has_tool_call_blocks,
+    token_counter,
+)
 
 from ..common_utils import AnthropicError, process_anthropic_headers
 
@@ -772,6 +779,15 @@ def transform_response(
         prompt_tokens_details = PromptTokensDetailsWrapper(
             cached_tokens=cache_read_input_tokens
         )
+        completion_token_details = (
+            CompletionTokensDetailsWrapper(
+                reasoning_tokens=token_counter(
+                    text=reasoning_content, count_response_tokens=True
+                )
+            )
+            if reasoning_content
+            else None
+        )
         total_tokens = prompt_tokens + completion_tokens
         usage = Usage(
             prompt_tokens=prompt_tokens,
@@ -780,6 +796,7 @@ def transform_response(
             prompt_tokens_details=prompt_tokens_details,
             cache_creation_input_tokens=cache_creation_input_tokens,
             cache_read_input_tokens=cache_read_input_tokens,
+            completion_tokens_details=completion_token_details,
         )
 
         setattr(model_response, "usage", usage)  # type: ignore
 
@@ -28,11 +28,11 @@ async def acreate_file(
         self,
         create_file_data: CreateFileRequest,
         openai_client: AsyncAzureOpenAI,
-    ) -> FileObject:
+    ) -> OpenAIFileObject:
         verbose_logger.debug("create_file_data=%s", create_file_data)
         response = await openai_client.files.create(**create_file_data)
         verbose_logger.debug("create_file_response=%s", response)
-        return response
+        return OpenAIFileObject(**response.model_dump())
 
     def create_file(
         self,
@@ -66,7 +66,7 @@ def create_file(
                 raise ValueError(
                     "AzureOpenAI client is not an instance of AsyncAzureOpenAI. Make sure you passed an AsyncAzureOpenAI client."
                 )
-            return self.acreate_file(  # type: ignore
+            return self.acreate_file(
                 create_file_data=create_file_data, openai_client=openai_client
             )
         response = cast(AzureOpenAI, openai_client).files.create(**create_file_data)