Skip to content

Commit 6dda1ba

Browse files
LiteLLM Minor Fixes & Improvements (04/02/2025) (#9725)
* Add date picker to usage tab + Add reasoning_content token tracking across all providers on streaming (#9722) * feat(new_usage.tsx): add date picker for new usage tab allow user to look back on their usage data * feat(anthropic/chat/transformation.py): report reasoning tokens in completion token details allows usage tracking on how many reasoning tokens are actually being used * feat(streaming_chunk_builder.py): return reasoning_tokens in anthropic/openai streaming response allows tracking reasoning_token usage across providers * Fix update team metadata + fix bulk adding models on Ui (#9721) * fix(handle_add_model_submit.tsx): fix bulk adding models * fix(team_info.tsx): fix team metadata update Fixes #9689 * (v0) Unified file id - allow calling multiple providers with same file id (#9718) * feat(files_endpoints.py): initial commit adding 'target_model_names' support allow developer to specify all the models they want to call with the file * feat(files_endpoints.py): return unified files endpoint * test(test_files_endpoints.py): add validation test - if invalid purpose submitted * feat: more updates * feat: initial working commit of unified file id translation * fix: additional fixes * fix(router.py): remove model replace logic in jsonl on acreate_file enables file upload to work for chat completion requests as well * fix(files_endpoints.py): remove whitespace around model name * fix(azure/handler.py): return acreate_file with correct response type * fix: fix linting errors * test: fix mock test to run on github actions * fix: fix ruff errors * fix: fix file too large error * fix(utils.py): remove redundant var * test: modify test to work on github actions * test: update tests * test: more debug logs to understand ci/cd issue * test: fix test for respx * test: skip mock respx test fails on ci/cd - not clear why * fix: fix ruff check * fix: fix test * fix(model_connection_test.tsx): fix linting error * test: update unit tests
1 parent 5a18eeb commit 6dda1ba

File tree

27 files changed

+889
-96
lines changed

27 files changed

+889
-96
lines changed

litellm/files/main.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,17 @@ async def acreate_file(
6363
loop = asyncio.get_event_loop()
6464
kwargs["acreate_file"] = True
6565

66-
# Use a partial function to pass your keyword arguments
67-
func = partial(
68-
create_file,
69-
file,
70-
purpose,
71-
custom_llm_provider,
72-
extra_headers,
73-
extra_body,
66+
call_args = {
67+
"file": file,
68+
"purpose": purpose,
69+
"custom_llm_provider": custom_llm_provider,
70+
"extra_headers": extra_headers,
71+
"extra_body": extra_body,
7472
**kwargs,
75-
)
73+
}
74+
75+
# Use a partial function to pass your keyword arguments
76+
func = partial(create_file, **call_args)
7677

7778
# Add the context to the function
7879
ctx = contextvars.copy_context()
@@ -92,7 +93,7 @@ async def acreate_file(
9293
def create_file(
9394
file: FileTypes,
9495
purpose: Literal["assistants", "batch", "fine-tune"],
95-
custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai",
96+
custom_llm_provider: Optional[Literal["openai", "azure", "vertex_ai"]] = None,
9697
extra_headers: Optional[Dict[str, str]] = None,
9798
extra_body: Optional[Dict[str, str]] = None,
9899
**kwargs,
@@ -101,6 +102,8 @@ def create_file(
101102
Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
102103
103104
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
105+
106+
Specify either provider_list or custom_llm_provider.
104107
"""
105108
try:
106109
_is_async = kwargs.pop("acreate_file", False) is True
@@ -120,7 +123,7 @@ def create_file(
120123
if (
121124
timeout is not None
122125
and isinstance(timeout, httpx.Timeout)
123-
and supports_httpx_timeout(custom_llm_provider) is False
126+
and supports_httpx_timeout(cast(str, custom_llm_provider)) is False
124127
):
125128
read_timeout = timeout.read or 600
126129
timeout = read_timeout # default 10 min timeout

litellm/litellm_core_utils/litellm_logging.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,12 @@ def get_chat_completion_prompt(
457457
non_default_params: dict,
458458
prompt_id: str,
459459
prompt_variables: Optional[dict],
460+
prompt_management_logger: Optional[CustomLogger] = None,
460461
) -> Tuple[str, List[AllMessageValues], dict]:
461-
custom_logger = self.get_custom_logger_for_prompt_management(model)
462+
custom_logger = (
463+
prompt_management_logger
464+
or self.get_custom_logger_for_prompt_management(model)
465+
)
462466
if custom_logger:
463467
(
464468
model,

litellm/litellm_core_utils/prompt_templates/common_utils.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from litellm.types.llms.openai import (
88
AllMessageValues,
99
ChatCompletionAssistantMessage,
10+
ChatCompletionFileObject,
1011
ChatCompletionUserMessage,
1112
)
1213
from litellm.types.utils import Choices, ModelResponse, StreamingChoices
@@ -292,3 +293,58 @@ def get_completion_messages(
292293
messages, assistant_continue_message, ensure_alternating_roles
293294
)
294295
return messages
296+
297+
298+
def get_file_ids_from_messages(messages: List[AllMessageValues]) -> List[str]:
299+
"""
300+
Gets file ids from messages
301+
"""
302+
file_ids = []
303+
for message in messages:
304+
if message.get("role") == "user":
305+
content = message.get("content")
306+
if content:
307+
if isinstance(content, str):
308+
continue
309+
for c in content:
310+
if c["type"] == "file":
311+
file_object = cast(ChatCompletionFileObject, c)
312+
file_object_file_field = file_object["file"]
313+
file_id = file_object_file_field.get("file_id")
314+
if file_id:
315+
file_ids.append(file_id)
316+
return file_ids
317+
318+
319+
def update_messages_with_model_file_ids(
320+
messages: List[AllMessageValues],
321+
model_id: str,
322+
model_file_id_mapping: Dict[str, Dict[str, str]],
323+
) -> List[AllMessageValues]:
324+
"""
325+
Updates messages with model file ids.
326+
327+
model_file_id_mapping: Dict[str, Dict[str, str]] = {
328+
"litellm_proxy/file_id": {
329+
"model_id": "provider_file_id"
330+
}
331+
}
332+
"""
333+
for message in messages:
334+
if message.get("role") == "user":
335+
content = message.get("content")
336+
if content:
337+
if isinstance(content, str):
338+
continue
339+
for c in content:
340+
if c["type"] == "file":
341+
file_object = cast(ChatCompletionFileObject, c)
342+
file_object_file_field = file_object["file"]
343+
file_id = file_object_file_field.get("file_id")
344+
if file_id:
345+
provider_file_id = (
346+
model_file_id_mapping.get(file_id, {}).get(model_id)
347+
or file_id
348+
)
349+
file_object_file_field["file_id"] = provider_file_id
350+
return messages

litellm/litellm_core_utils/streaming_chunk_builder_utils.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import base64
22
import time
3-
from typing import Any, Dict, List, Optional, Union
3+
from typing import Any, Dict, List, Optional, Union, cast
44

55
from litellm.types.llms.openai import (
66
ChatCompletionAssistantContentValue,
@@ -9,7 +9,9 @@
99
from litellm.types.utils import (
1010
ChatCompletionAudioResponse,
1111
ChatCompletionMessageToolCall,
12+
Choices,
1213
CompletionTokensDetails,
14+
CompletionTokensDetailsWrapper,
1315
Function,
1416
FunctionCall,
1517
ModelResponse,
@@ -203,14 +205,14 @@ def get_combined_function_call_content(
203205
)
204206

205207
def get_combined_content(
206-
self, chunks: List[Dict[str, Any]]
208+
self, chunks: List[Dict[str, Any]], delta_key: str = "content"
207209
) -> ChatCompletionAssistantContentValue:
208210
content_list: List[str] = []
209211
for chunk in chunks:
210212
choices = chunk["choices"]
211213
for choice in choices:
212214
delta = choice.get("delta", {})
213-
content = delta.get("content", "")
215+
content = delta.get(delta_key, "")
214216
if content is None:
215217
continue # openai v1.0.0 sets content = None for chunks
216218
content_list.append(content)
@@ -221,6 +223,11 @@ def get_combined_content(
221223
# Update the "content" field within the response dictionary
222224
return combined_content
223225

226+
def get_combined_reasoning_content(
227+
self, chunks: List[Dict[str, Any]]
228+
) -> ChatCompletionAssistantContentValue:
229+
return self.get_combined_content(chunks, delta_key="reasoning_content")
230+
224231
def get_combined_audio_content(
225232
self, chunks: List[Dict[str, Any]]
226233
) -> ChatCompletionAudioResponse:
@@ -296,12 +303,27 @@ def _usage_chunk_calculation_helper(self, usage_chunk: Usage) -> dict:
296303
"prompt_tokens_details": prompt_tokens_details,
297304
}
298305

306+
def count_reasoning_tokens(self, response: ModelResponse) -> int:
307+
reasoning_tokens = 0
308+
for choice in response.choices:
309+
if (
310+
hasattr(cast(Choices, choice).message, "reasoning_content")
311+
and cast(Choices, choice).message.reasoning_content is not None
312+
):
313+
reasoning_tokens += token_counter(
314+
text=cast(Choices, choice).message.reasoning_content,
315+
count_response_tokens=True,
316+
)
317+
318+
return reasoning_tokens
319+
299320
def calculate_usage(
300321
self,
301322
chunks: List[Union[Dict[str, Any], ModelResponse]],
302323
model: str,
303324
completion_output: str,
304325
messages: Optional[List] = None,
326+
reasoning_tokens: Optional[int] = None,
305327
) -> Usage:
306328
"""
307329
Calculate usage for the given chunks.
@@ -382,6 +404,19 @@ def calculate_usage(
382404
) # for anthropic
383405
if completion_tokens_details is not None:
384406
returned_usage.completion_tokens_details = completion_tokens_details
407+
408+
if reasoning_tokens is not None:
409+
if returned_usage.completion_tokens_details is None:
410+
returned_usage.completion_tokens_details = (
411+
CompletionTokensDetailsWrapper(reasoning_tokens=reasoning_tokens)
412+
)
413+
elif (
414+
returned_usage.completion_tokens_details is not None
415+
and returned_usage.completion_tokens_details.reasoning_tokens is None
416+
):
417+
returned_usage.completion_tokens_details.reasoning_tokens = (
418+
reasoning_tokens
419+
)
385420
if prompt_tokens_details is not None:
386421
returned_usage.prompt_tokens_details = prompt_tokens_details
387422

litellm/llms/anthropic/chat/handler.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
get_async_httpx_client,
2222
)
2323
from litellm.types.llms.anthropic import (
24-
AnthropicChatCompletionUsageBlock,
2524
ContentBlockDelta,
2625
ContentBlockStart,
2726
ContentBlockStop,
@@ -32,13 +31,13 @@
3231
from litellm.types.llms.openai import (
3332
ChatCompletionThinkingBlock,
3433
ChatCompletionToolCallChunk,
35-
ChatCompletionUsageBlock,
3634
)
3735
from litellm.types.utils import (
3836
Delta,
3937
GenericStreamingChunk,
4038
ModelResponseStream,
4139
StreamingChoices,
40+
Usage,
4241
)
4342
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
4443

@@ -487,10 +486,8 @@ def check_empty_tool_call_args(self) -> bool:
487486
return True
488487
return False
489488

490-
def _handle_usage(
491-
self, anthropic_usage_chunk: Union[dict, UsageDelta]
492-
) -> AnthropicChatCompletionUsageBlock:
493-
usage_block = AnthropicChatCompletionUsageBlock(
489+
def _handle_usage(self, anthropic_usage_chunk: Union[dict, UsageDelta]) -> Usage:
490+
usage_block = Usage(
494491
prompt_tokens=anthropic_usage_chunk.get("input_tokens", 0),
495492
completion_tokens=anthropic_usage_chunk.get("output_tokens", 0),
496493
total_tokens=anthropic_usage_chunk.get("input_tokens", 0)
@@ -581,7 +578,7 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
581578
text = ""
582579
tool_use: Optional[ChatCompletionToolCallChunk] = None
583580
finish_reason = ""
584-
usage: Optional[ChatCompletionUsageBlock] = None
581+
usage: Optional[Usage] = None
585582
provider_specific_fields: Dict[str, Any] = {}
586583
reasoning_content: Optional[str] = None
587584
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None

litellm/llms/anthropic/chat/transformation.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,16 @@
3333
ChatCompletionToolCallFunctionChunk,
3434
ChatCompletionToolParam,
3535
)
36+
from litellm.types.utils import CompletionTokensDetailsWrapper
3637
from litellm.types.utils import Message as LitellmMessage
3738
from litellm.types.utils import PromptTokensDetailsWrapper
38-
from litellm.utils import ModelResponse, Usage, add_dummy_tool, has_tool_call_blocks
39+
from litellm.utils import (
40+
ModelResponse,
41+
Usage,
42+
add_dummy_tool,
43+
has_tool_call_blocks,
44+
token_counter,
45+
)
3946

4047
from ..common_utils import AnthropicError, process_anthropic_headers
4148

@@ -772,6 +779,15 @@ def transform_response(
772779
prompt_tokens_details = PromptTokensDetailsWrapper(
773780
cached_tokens=cache_read_input_tokens
774781
)
782+
completion_token_details = (
783+
CompletionTokensDetailsWrapper(
784+
reasoning_tokens=token_counter(
785+
text=reasoning_content, count_response_tokens=True
786+
)
787+
)
788+
if reasoning_content
789+
else None
790+
)
775791
total_tokens = prompt_tokens + completion_tokens
776792
usage = Usage(
777793
prompt_tokens=prompt_tokens,
@@ -780,6 +796,7 @@ def transform_response(
780796
prompt_tokens_details=prompt_tokens_details,
781797
cache_creation_input_tokens=cache_creation_input_tokens,
782798
cache_read_input_tokens=cache_read_input_tokens,
799+
completion_tokens_details=completion_token_details,
783800
)
784801

785802
setattr(model_response, "usage", usage) # type: ignore

litellm/llms/azure/files/handler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ async def acreate_file(
2828
self,
2929
create_file_data: CreateFileRequest,
3030
openai_client: AsyncAzureOpenAI,
31-
) -> FileObject:
31+
) -> OpenAIFileObject:
3232
verbose_logger.debug("create_file_data=%s", create_file_data)
3333
response = await openai_client.files.create(**create_file_data)
3434
verbose_logger.debug("create_file_response=%s", response)
35-
return response
35+
return OpenAIFileObject(**response.model_dump())
3636

3737
def create_file(
3838
self,
@@ -66,7 +66,7 @@ def create_file(
6666
raise ValueError(
6767
"AzureOpenAI client is not an instance of AsyncAzureOpenAI. Make sure you passed an AsyncAzureOpenAI client."
6868
)
69-
return self.acreate_file( # type: ignore
69+
return self.acreate_file(
7070
create_file_data=create_file_data, openai_client=openai_client
7171
)
7272
response = cast(AzureOpenAI, openai_client).files.create(**create_file_data)

0 commit comments

Comments
 (0)