From b0e32562aff9aceafec994d3b047f7c2a9f11524 Mon Sep 17 00:00:00 2001 From: Robert Craigie Date: Tue, 10 Sep 2024 16:46:51 +0100 Subject: [PATCH 1/4] feat(errors): include completion in LengthFinishReasonError (#1701) --- src/openai/_exceptions.py | 23 +++++++++++++++---- src/openai/lib/_parsing/_completions.py | 2 +- src/openai/lib/streaming/chat/_completions.py | 4 +++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/openai/_exceptions.py b/src/openai/_exceptions.py index f44f90b52f..e326ed9578 100644 --- a/src/openai/_exceptions.py +++ b/src/openai/_exceptions.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, Optional, cast +from typing import TYPE_CHECKING, Any, Optional, cast from typing_extensions import Literal import httpx @@ -10,6 +10,9 @@ from ._utils import is_dict from ._models import construct_type +if TYPE_CHECKING: + from .types.chat import ChatCompletion + __all__ = [ "BadRequestError", "AuthenticationError", @@ -130,10 +133,20 @@ class InternalServerError(APIStatusError): class LengthFinishReasonError(OpenAIError): - def __init__(self) -> None: - super().__init__( - f"Could not parse response content as the length limit was reached", - ) + completion: ChatCompletion + """The completion that caused this error. + + Note: this will *not* be a complete `ChatCompletion` object when streaming as `usage` + will not be included. + """ + + def __init__(self, *, completion: ChatCompletion) -> None: + msg = "Could not parse response content as the length limit was reached" + if completion.usage: + msg += f" - {completion.usage}" + + super().__init__(msg) + self.completion = completion class ContentFilterFinishReasonError(OpenAIError): diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py index 2ef1bf3553..f1fa9f2b55 100644 --- a/src/openai/lib/_parsing/_completions.py +++ b/src/openai/lib/_parsing/_completions.py @@ -69,7 +69,7 @@ def parse_chat_completion( choices: list[ParsedChoice[ResponseFormatT]] = [] for choice in chat_completion.choices: if choice.finish_reason == "length": - raise LengthFinishReasonError() + raise LengthFinishReasonError(completion=chat_completion) if choice.finish_reason == "content_filter": raise ContentFilterFinishReasonError() diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py index 342a5e2b95..a4b0f856f7 100644 --- a/src/openai/lib/streaming/chat/_completions.py +++ b/src/openai/lib/streaming/chat/_completions.py @@ -394,7 +394,9 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS if has_parseable_input(response_format=self._response_format, input_tools=self._input_tools): if choice.finish_reason == "length": - raise LengthFinishReasonError() + # at the time of writing, `.usage` will always be `None` but + # we include it here in case that is changed in the future + raise LengthFinishReasonError(completion=completion_snapshot) if choice.finish_reason == "content_filter": raise ContentFilterFinishReasonError() From 80f02f9e5f83fac9cd2f4172b733a92ad01399b2 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 14:11:58 +0000 Subject: [PATCH 2/4] fix(types): correctly mark stream discriminator as optional (#1706) --- src/openai/types/beta/thread_create_and_run_params.py | 2 +- src/openai/types/beta/threads/run_create_params.py | 2 +- src/openai/types/beta/threads/run_submit_tool_outputs_params.py | 2 +- src/openai/types/chat/completion_create_params.py | 2 +- src/openai/types/completion_create_params.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py index cd3d9f29d4..370c2f9bce 100644 --- a/src/openai/types/beta/thread_create_and_run_params.py +++ b/src/openai/types/beta/thread_create_and_run_params.py @@ -332,7 +332,7 @@ class TruncationStrategy(TypedDict, total=False): """ -class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase): +class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase, total=False): stream: Optional[Literal[False]] """ If `true`, returns a stream of events that happen during the Run as server-sent diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py index 8bb73ddc78..7c5f571d58 100644 --- a/src/openai/types/beta/threads/run_create_params.py +++ b/src/openai/types/beta/threads/run_create_params.py @@ -225,7 +225,7 @@ class TruncationStrategy(TypedDict, total=False): """ -class RunCreateParamsNonStreaming(RunCreateParamsBase): +class RunCreateParamsNonStreaming(RunCreateParamsBase, total=False): stream: Optional[Literal[False]] """ If `true`, returns a stream of events that happen during the Run as server-sent diff --git a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py index ccb5e5e97e..147728603a 100644 --- a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py +++ b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py @@ -31,7 +31,7 @@ class ToolOutput(TypedDict, total=False): """ -class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase): +class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase, total=False): stream: Optional[Literal[False]] """ If `true`, returns a stream of events that happen during the Run as server-sent diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index 91435dcedd..b86dab742b 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -262,7 +262,7 @@ class Function(TypedDict, total=False): ResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema] -class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase): +class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False): stream: Optional[Literal[False]] """If set, partial message deltas will be sent, like in ChatGPT. diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py index 9fe22fe3c9..6c112b3902 100644 --- a/src/openai/types/completion_create_params.py +++ b/src/openai/types/completion_create_params.py @@ -160,7 +160,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ -class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase): +class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False): stream: Optional[Literal[False]] """Whether to stream back partial progress. From 06bd42e77121a6abd4826a79ce1848812d956576 Mon Sep 17 00:00:00 2001 From: Stainless Bot Date: Thu, 12 Sep 2024 16:54:32 +0000 Subject: [PATCH 3/4] feat(api): add o1 models (#1708) See https://platform.openai.com/docs/guides/reasoning for details. --- .stats.yml | 2 +- src/openai/resources/beta/assistants.py | 24 +-- src/openai/resources/beta/chat/completions.py | 8 + .../resources/beta/threads/runs/runs.py | 36 ++-- src/openai/resources/beta/threads/threads.py | 36 ++-- src/openai/resources/chat/completions.py | 172 ++++++++++++------ src/openai/resources/fine_tuning/jobs/jobs.py | 4 +- src/openai/types/beta/assistant.py | 6 +- .../types/beta/assistant_create_params.py | 6 +- .../types/beta/assistant_update_params.py | 6 +- src/openai/types/beta/file_search_tool.py | 15 +- .../types/beta/file_search_tool_param.py | 15 +- .../beta/thread_create_and_run_params.py | 6 +- src/openai/types/beta/threads/run.py | 6 +- .../types/beta/threads/run_create_params.py | 6 +- .../types/chat/completion_create_params.py | 30 ++- src/openai/types/chat_model.py | 7 +- src/openai/types/completion_usage.py | 11 +- .../types/fine_tuning/job_create_params.py | 2 +- tests/api_resources/chat/test_completions.py | 4 + tests/lib/chat/test_completions.py | 8 +- tests/lib/chat/test_completions_streaming.py | 2 +- 22 files changed, 253 insertions(+), 159 deletions(-) diff --git a/.stats.yml b/.stats.yml index 903c159960..de3167f3a8 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 68 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-85a85e0c08de456441431c0ae4e9c078cc8f9748c29430b9a9058340db6389ee.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-501122aa32adaa2abb3d4487880ab9cdf2141addce2e6c3d1bd9bb6b44c318a8.yml diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py index 1e57944eb3..5d8c6ec331 100644 --- a/src/openai/resources/beta/assistants.py +++ b/src/openai/resources/beta/assistants.py @@ -100,11 +100,11 @@ def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -250,11 +250,11 @@ def update( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -486,11 +486,11 @@ async def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -636,11 +636,11 @@ async def update( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/resources/beta/chat/completions.py b/src/openai/resources/beta/chat/completions.py index 07eda27b76..ea3526778d 100644 --- a/src/openai/resources/beta/chat/completions.py +++ b/src/openai/resources/beta/chat/completions.py @@ -42,6 +42,7 @@ def parse( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -121,6 +122,7 @@ class MathResponse(BaseModel): functions=functions, logit_bias=logit_bias, logprobs=logprobs, + max_completion_tokens=max_completion_tokens, max_tokens=max_tokens, n=n, parallel_tool_calls=parallel_tool_calls, @@ -157,6 +159,7 @@ def stream( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -216,6 +219,7 @@ def stream( functions=functions, logit_bias=logit_bias, logprobs=logprobs, + max_completion_tokens=max_completion_tokens, max_tokens=max_tokens, n=n, parallel_tool_calls=parallel_tool_calls, @@ -254,6 +258,7 @@ async def parse( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -333,6 +338,7 @@ class MathResponse(BaseModel): functions=functions, logit_bias=logit_bias, logprobs=logprobs, + max_completion_tokens=max_completion_tokens, max_tokens=max_tokens, n=n, parallel_tool_calls=parallel_tool_calls, @@ -369,6 +375,7 @@ def stream( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -429,6 +436,7 @@ def stream( functions=functions, logit_bias=logit_bias, logprobs=logprobs, + max_completion_tokens=max_completion_tokens, max_tokens=max_tokens, n=n, parallel_tool_calls=parallel_tool_calls, diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py index 807027a644..3fb1cc77aa 100644 --- a/src/openai/resources/beta/threads/runs/runs.py +++ b/src/openai/resources/beta/threads/runs/runs.py @@ -167,11 +167,11 @@ def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -311,11 +311,11 @@ def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -451,11 +451,11 @@ def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1529,11 +1529,11 @@ async def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1673,11 +1673,11 @@ async def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1813,11 +1813,11 @@ async def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py index 031121e5cc..49b0e4b37e 100644 --- a/src/openai/resources/beta/threads/threads.py +++ b/src/openai/resources/beta/threads/threads.py @@ -335,11 +335,11 @@ def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -469,11 +469,11 @@ def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -599,11 +599,11 @@ def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1169,11 +1169,11 @@ async def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1303,11 +1303,11 @@ async def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1433,11 +1433,11 @@ async def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py index ec76bfdf52..e9267b1f03 100644 --- a/src/openai/resources/chat/completions.py +++ b/src/openai/resources/chat/completions.py @@ -64,6 +64,7 @@ def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -132,13 +133,17 @@ def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -161,11 +166,11 @@ def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -185,8 +190,11 @@ def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -261,6 +269,7 @@ def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -335,13 +344,17 @@ def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -364,11 +377,11 @@ def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -388,8 +401,11 @@ def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -457,6 +473,7 @@ def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -531,13 +548,17 @@ def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -560,11 +581,11 @@ def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -584,8 +605,11 @@ def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -652,6 +676,7 @@ def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -687,6 +712,7 @@ def create( "functions": functions, "logit_bias": logit_bias, "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, "max_tokens": max_tokens, "n": n, "parallel_tool_calls": parallel_tool_calls, @@ -746,6 +772,7 @@ async def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -814,13 +841,17 @@ async def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -843,11 +874,11 @@ async def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -867,8 +898,11 @@ async def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -943,6 +977,7 @@ async def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -1017,13 +1052,17 @@ async def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -1046,11 +1085,11 @@ async def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1070,8 +1109,11 @@ async def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -1139,6 +1181,7 @@ async def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -1213,13 +1256,17 @@ async def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -1242,11 +1289,11 @@ async def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1266,8 +1313,11 @@ async def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -1334,6 +1384,7 @@ async def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -1369,6 +1420,7 @@ async def create( "functions": functions, "logit_bias": logit_bias, "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, "max_tokens": max_tokens, "n": n, "parallel_tool_calls": parallel_tool_calls, diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py index ca4799e7ac..44abf1cfe1 100644 --- a/src/openai/resources/fine_tuning/jobs/jobs.py +++ b/src/openai/resources/fine_tuning/jobs/jobs.py @@ -114,7 +114,7 @@ def create( job parameters should produce the same results, but may differ in rare cases. If a seed is not specified, one will be generated for you. - suffix: A string of up to 18 characters that will be added to your fine-tuned model + suffix: A string of up to 64 characters that will be added to your fine-tuned model name. For example, a `suffix` of "custom-model-name" would produce a model name like @@ -405,7 +405,7 @@ async def create( job parameters should produce the same results, but may differ in rare cases. If a seed is not specified, one will be generated for you. - suffix: A string of up to 18 characters that will be added to your fine-tuned model + suffix: A string of up to 64 characters that will be added to your fine-tuned model name. For example, a `suffix` of "custom-model-name" would produce a model name like diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py index c6a0a4cfcf..b4da08745d 100644 --- a/src/openai/types/beta/assistant.py +++ b/src/openai/types/beta/assistant.py @@ -90,11 +90,11 @@ class Assistant(BaseModel): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py index c1360b5b66..eca4da0a2b 100644 --- a/src/openai/types/beta/assistant_create_params.py +++ b/src/openai/types/beta/assistant_create_params.py @@ -58,11 +58,11 @@ class AssistantCreateParams(TypedDict, total=False): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py index ade565819f..5396233937 100644 --- a/src/openai/types/beta/assistant_update_params.py +++ b/src/openai/types/beta/assistant_update_params.py @@ -50,11 +50,11 @@ class AssistantUpdateParams(TypedDict, total=False): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/file_search_tool.py b/src/openai/types/beta/file_search_tool.py index 4015b3da09..aee6593e89 100644 --- a/src/openai/types/beta/file_search_tool.py +++ b/src/openai/types/beta/file_search_tool.py @@ -9,16 +9,16 @@ class FileSearchRankingOptions(BaseModel): - ranker: Optional[Literal["auto", "default_2024_08_21"]] = None - """The ranker to use for the file search. + score_threshold: float + """The score threshold for the file search. - If not specified will use the `auto` ranker. + All values must be a floating point number between 0 and 1. """ - score_threshold: Optional[float] = None - """The score threshold for the file search. + ranker: Optional[Literal["auto", "default_2024_08_21"]] = None + """The ranker to use for the file search. - All values must be a floating point number between 0 and 1. + If not specified will use the `auto` ranker. """ @@ -38,6 +38,9 @@ class FileSearch(BaseModel): ranking_options: Optional[FileSearchRankingOptions] = None """The ranking options for the file search. + If not specified, the file search tool will use the `auto` ranker and a + score_threshold of 0. + See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings) for more information. diff --git a/src/openai/types/beta/file_search_tool_param.py b/src/openai/types/beta/file_search_tool_param.py index 97e651b0da..5ce91207ba 100644 --- a/src/openai/types/beta/file_search_tool_param.py +++ b/src/openai/types/beta/file_search_tool_param.py @@ -8,16 +8,16 @@ class FileSearchRankingOptions(TypedDict, total=False): - ranker: Literal["auto", "default_2024_08_21"] - """The ranker to use for the file search. + score_threshold: Required[float] + """The score threshold for the file search. - If not specified will use the `auto` ranker. + All values must be a floating point number between 0 and 1. """ - score_threshold: float - """The score threshold for the file search. + ranker: Literal["auto", "default_2024_08_21"] + """The ranker to use for the file search. - All values must be a floating point number between 0 and 1. + If not specified will use the `auto` ranker. """ @@ -37,6 +37,9 @@ class FileSearch(TypedDict, total=False): ranking_options: FileSearchRankingOptions """The ranking options for the file search. + If not specified, the file search tool will use the `auto` ranker and a + score_threshold of 0. + See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings) for more information. diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py index 370c2f9bce..20d525fa1a 100644 --- a/src/openai/types/beta/thread_create_and_run_params.py +++ b/src/openai/types/beta/thread_create_and_run_params.py @@ -98,11 +98,11 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py index 0579e229d8..5abc1de295 100644 --- a/src/openai/types/beta/threads/run.py +++ b/src/openai/types/beta/threads/run.py @@ -172,11 +172,11 @@ class Run(BaseModel): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py index 7c5f571d58..824cb1a041 100644 --- a/src/openai/types/beta/threads/run_create_params.py +++ b/src/openai/types/beta/threads/run_create_params.py @@ -111,11 +111,11 @@ class RunCreateParamsBase(TypedDict, total=False): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index b86dab742b..4ed89b00f5 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -87,15 +87,22 @@ class CompletionCreateParamsBase(TypedDict, total=False): `content` of `message`. """ + max_completion_tokens: Optional[int] + """ + An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + """ + max_tokens: Optional[int] """ The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). """ n: Optional[int] @@ -130,11 +137,11 @@ class CompletionCreateParamsBase(TypedDict, total=False): all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -160,8 +167,11 @@ class CompletionCreateParamsBase(TypedDict, total=False): This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py index 09bc081f7a..f8438c75c8 100644 --- a/src/openai/types/chat_model.py +++ b/src/openai/types/chat_model.py @@ -5,9 +5,14 @@ __all__ = ["ChatModel"] ChatModel: TypeAlias = Literal[ + "o1-preview", + "o1-preview-2024-09-12", + "o1-mini", + "o1-mini-2024-09-12", "gpt-4o", - "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", + "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", "gpt-4-turbo", diff --git a/src/openai/types/completion_usage.py b/src/openai/types/completion_usage.py index ac09afd479..a4b9116e35 100644 --- a/src/openai/types/completion_usage.py +++ b/src/openai/types/completion_usage.py @@ -1,9 +1,15 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from typing import Optional from .._models import BaseModel -__all__ = ["CompletionUsage"] +__all__ = ["CompletionUsage", "CompletionTokensDetails"] + + +class CompletionTokensDetails(BaseModel): + reasoning_tokens: Optional[int] = None + """Tokens generated by the model for reasoning.""" class CompletionUsage(BaseModel): @@ -15,3 +21,6 @@ class CompletionUsage(BaseModel): total_tokens: int """Total number of tokens used in the request (prompt + completion).""" + + completion_tokens_details: Optional[CompletionTokensDetails] = None + """Breakdown of tokens used in a completion.""" diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py index e9be2ef1ca..8f5ea86274 100644 --- a/src/openai/types/fine_tuning/job_create_params.py +++ b/src/openai/types/fine_tuning/job_create_params.py @@ -50,7 +50,7 @@ class JobCreateParams(TypedDict, total=False): suffix: Optional[str] """ - A string of up to 18 characters that will be added to your fine-tuned model + A string of up to 64 characters that will be added to your fine-tuned model name. For example, a `suffix` of "custom-model-name" would produce a model name like diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 0b89fbf9cd..c44703a434 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -55,6 +55,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: ], logit_bias={"foo": 0}, logprobs=True, + max_completion_tokens=0, max_tokens=0, n=1, parallel_tool_calls=True, @@ -175,6 +176,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: ], logit_bias={"foo": 0}, logprobs=True, + max_completion_tokens=0, max_tokens=0, n=1, parallel_tool_calls=True, @@ -314,6 +316,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn ], logit_bias={"foo": 0}, logprobs=True, + max_completion_tokens=0, max_tokens=0, n=1, parallel_tool_calls=True, @@ -434,6 +437,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn ], logit_bias={"foo": 0}, logprobs=True, + max_completion_tokens=0, max_tokens=0, n=1, parallel_tool_calls=True, diff --git a/tests/lib/chat/test_completions.py b/tests/lib/chat/test_completions.py index d67d5129cd..e7b9c4f1fd 100644 --- a/tests/lib/chat/test_completions.py +++ b/tests/lib/chat/test_completions.py @@ -74,7 +74,7 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte object='chat.completion', service_tier=None, system_fingerprint='fp_845eaabc1f', - usage=CompletionUsage(completion_tokens=28, prompt_tokens=14, total_tokens=42) + usage=CompletionUsage(completion_tokens=28, completion_tokens_details=None, prompt_tokens=14, total_tokens=42) ) """ ) @@ -129,7 +129,7 @@ class Location(BaseModel): object='chat.completion', service_tier=None, system_fingerprint='fp_2a322c9ffc', - usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31) + usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31) ) """ ) @@ -186,7 +186,7 @@ class Location(BaseModel): object='chat.completion', service_tier=None, system_fingerprint='fp_2a322c9ffc', - usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31) + usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31) ) """ ) @@ -368,7 +368,7 @@ class CalendarEvent: object='chat.completion', service_tier=None, system_fingerprint='fp_2a322c9ffc', - usage=CompletionUsage(completion_tokens=17, prompt_tokens=32, total_tokens=49) + usage=CompletionUsage(completion_tokens=17, completion_tokens_details=None, prompt_tokens=32, total_tokens=49) ) """ ) diff --git a/tests/lib/chat/test_completions_streaming.py b/tests/lib/chat/test_completions_streaming.py index c3dd69ad57..5ad1f084d2 100644 --- a/tests/lib/chat/test_completions_streaming.py +++ b/tests/lib/chat/test_completions_streaming.py @@ -155,7 +155,7 @@ def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStream object='chat.completion', service_tier=None, system_fingerprint='fp_845eaabc1f', - usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31) + usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31) ) """ ) From 37be8a03696ce77758f0f9e5f62ceedd982ebef9 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 17:03:58 +0000 Subject: [PATCH 4/4] release: 1.45.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 14 ++++++++++++++ pyproject.toml | 2 +- src/openai/_version.py | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 1ee5dee6dd..6d2723c72a 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.44.1" + ".": "1.45.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 47fa2d9208..8382675b73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## 1.45.0 (2024-09-12) + +Full Changelog: [v1.44.1...v1.45.0](https://github.com/openai/openai-python/compare/v1.44.1...v1.45.0) + +### Features + +* **api:** add o1 models ([#1708](https://github.com/openai/openai-python/issues/1708)) ([06bd42e](https://github.com/openai/openai-python/commit/06bd42e77121a6abd4826a79ce1848812d956576)) +* **errors:** include completion in LengthFinishReasonError ([#1701](https://github.com/openai/openai-python/issues/1701)) ([b0e3256](https://github.com/openai/openai-python/commit/b0e32562aff9aceafec994d3b047f7c2a9f11524)) + + +### Bug Fixes + +* **types:** correctly mark stream discriminator as optional ([#1706](https://github.com/openai/openai-python/issues/1706)) ([80f02f9](https://github.com/openai/openai-python/commit/80f02f9e5f83fac9cd2f4172b733a92ad01399b2)) + ## 1.44.1 (2024-09-09) Full Changelog: [v1.44.0...v1.44.1](https://github.com/openai/openai-python/compare/v1.44.0...v1.44.1) diff --git a/pyproject.toml b/pyproject.toml index d4b7d2b210..178c3db355 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openai" -version = "1.44.1" +version = "1.45.0" description = "The official Python library for the openai API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/openai/_version.py b/src/openai/_version.py index 39c7f63e1e..0d8ebf418b 100644 --- a/src/openai/_version.py +++ b/src/openai/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "openai" -__version__ = "1.44.1" # x-release-please-version +__version__ = "1.45.0" # x-release-please-version