From b0e32562aff9aceafec994d3b047f7c2a9f11524 Mon Sep 17 00:00:00 2001
From: Robert Craigie <robert@craigie.dev>
Date: Tue, 10 Sep 2024 16:46:51 +0100
Subject: [PATCH 1/4] feat(errors): include completion in
 LengthFinishReasonError (#1701)

---
 src/openai/_exceptions.py                     | 23 +++++++++++++++----
 src/openai/lib/_parsing/_completions.py       |  2 +-
 src/openai/lib/streaming/chat/_completions.py |  4 +++-
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/openai/_exceptions.py b/src/openai/_exceptions.py
index f44f90b52f..e326ed9578 100644
--- a/src/openai/_exceptions.py
+++ b/src/openai/_exceptions.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Any, Optional, cast
+from typing import TYPE_CHECKING, Any, Optional, cast
 from typing_extensions import Literal
 
 import httpx
@@ -10,6 +10,9 @@
 from ._utils import is_dict
 from ._models import construct_type
 
+if TYPE_CHECKING:
+    from .types.chat import ChatCompletion
+
 __all__ = [
     "BadRequestError",
     "AuthenticationError",
@@ -130,10 +133,20 @@ class InternalServerError(APIStatusError):
 
 
 class LengthFinishReasonError(OpenAIError):
-    def __init__(self) -> None:
-        super().__init__(
-            f"Could not parse response content as the length limit was reached",
-        )
+    completion: ChatCompletion
+    """The completion that caused this error.
+
+    Note: this will *not* be a complete `ChatCompletion` object when streaming as `usage`
+          will not be included.
+    """
+
+    def __init__(self, *, completion: ChatCompletion) -> None:
+        msg = "Could not parse response content as the length limit was reached"
+        if completion.usage:
+            msg += f" - {completion.usage}"
+
+        super().__init__(msg)
+        self.completion = completion
 
 
 class ContentFilterFinishReasonError(OpenAIError):
diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
index 2ef1bf3553..f1fa9f2b55 100644
--- a/src/openai/lib/_parsing/_completions.py
+++ b/src/openai/lib/_parsing/_completions.py
@@ -69,7 +69,7 @@ def parse_chat_completion(
     choices: list[ParsedChoice[ResponseFormatT]] = []
     for choice in chat_completion.choices:
         if choice.finish_reason == "length":
-            raise LengthFinishReasonError()
+            raise LengthFinishReasonError(completion=chat_completion)
 
         if choice.finish_reason == "content_filter":
             raise ContentFilterFinishReasonError()
diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py
index 342a5e2b95..a4b0f856f7 100644
--- a/src/openai/lib/streaming/chat/_completions.py
+++ b/src/openai/lib/streaming/chat/_completions.py
@@ -394,7 +394,9 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS
 
                 if has_parseable_input(response_format=self._response_format, input_tools=self._input_tools):
                     if choice.finish_reason == "length":
-                        raise LengthFinishReasonError()
+                        # at the time of writing, `.usage` will always be `None` but
+                        # we include it here in case that is changed in the future
+                        raise LengthFinishReasonError(completion=completion_snapshot)
 
                     if choice.finish_reason == "content_filter":
                         raise ContentFilterFinishReasonError()

From 80f02f9e5f83fac9cd2f4172b733a92ad01399b2 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 12 Sep 2024 14:11:58 +0000
Subject: [PATCH 2/4] fix(types): correctly mark stream discriminator as
 optional (#1706)

---
 src/openai/types/beta/thread_create_and_run_params.py           | 2 +-
 src/openai/types/beta/threads/run_create_params.py              | 2 +-
 src/openai/types/beta/threads/run_submit_tool_outputs_params.py | 2 +-
 src/openai/types/chat/completion_create_params.py               | 2 +-
 src/openai/types/completion_create_params.py                    | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index cd3d9f29d4..370c2f9bce 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -332,7 +332,7 @@ class TruncationStrategy(TypedDict, total=False):
     """
 
 
-class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
+class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index 8bb73ddc78..7c5f571d58 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -225,7 +225,7 @@ class TruncationStrategy(TypedDict, total=False):
     """
 
 
-class RunCreateParamsNonStreaming(RunCreateParamsBase):
+class RunCreateParamsNonStreaming(RunCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
index ccb5e5e97e..147728603a 100644
--- a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
+++ b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
@@ -31,7 +31,7 @@ class ToolOutput(TypedDict, total=False):
     """
 
 
-class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase):
+class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 91435dcedd..b86dab742b 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -262,7 +262,7 @@ class Function(TypedDict, total=False):
 ResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema]
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """If set, partial message deltas will be sent, like in ChatGPT.
 
diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
index 9fe22fe3c9..6c112b3902 100644
--- a/src/openai/types/completion_create_params.py
+++ b/src/openai/types/completion_create_params.py
@@ -160,7 +160,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """Whether to stream back partial progress.
 

From 06bd42e77121a6abd4826a79ce1848812d956576 Mon Sep 17 00:00:00 2001
From: Stainless Bot <dev+git@stainlessapi.com>
Date: Thu, 12 Sep 2024 16:54:32 +0000
Subject: [PATCH 3/4] feat(api): add o1 models (#1708)

See https://platform.openai.com/docs/guides/reasoning for details.
---
 .stats.yml                                    |   2 +-
 src/openai/resources/beta/assistants.py       |  24 +--
 src/openai/resources/beta/chat/completions.py |   8 +
 .../resources/beta/threads/runs/runs.py       |  36 ++--
 src/openai/resources/beta/threads/threads.py  |  36 ++--
 src/openai/resources/chat/completions.py      | 172 ++++++++++++------
 src/openai/resources/fine_tuning/jobs/jobs.py |   4 +-
 src/openai/types/beta/assistant.py            |   6 +-
 .../types/beta/assistant_create_params.py     |   6 +-
 .../types/beta/assistant_update_params.py     |   6 +-
 src/openai/types/beta/file_search_tool.py     |  15 +-
 .../types/beta/file_search_tool_param.py      |  15 +-
 .../beta/thread_create_and_run_params.py      |   6 +-
 src/openai/types/beta/threads/run.py          |   6 +-
 .../types/beta/threads/run_create_params.py   |   6 +-
 .../types/chat/completion_create_params.py    |  30 ++-
 src/openai/types/chat_model.py                |   7 +-
 src/openai/types/completion_usage.py          |  11 +-
 .../types/fine_tuning/job_create_params.py    |   2 +-
 tests/api_resources/chat/test_completions.py  |   4 +
 tests/lib/chat/test_completions.py            |   8 +-
 tests/lib/chat/test_completions_streaming.py  |   2 +-
 22 files changed, 253 insertions(+), 159 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index 903c159960..de3167f3a8 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-85a85e0c08de456441431c0ae4e9c078cc8f9748c29430b9a9058340db6389ee.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-501122aa32adaa2abb3d4487880ab9cdf2141addce2e6c3d1bd9bb6b44c318a8.yml
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index 1e57944eb3..5d8c6ec331 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -100,11 +100,11 @@ def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -250,11 +250,11 @@ def update(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -486,11 +486,11 @@ async def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -636,11 +636,11 @@ async def update(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/resources/beta/chat/completions.py b/src/openai/resources/beta/chat/completions.py
index 07eda27b76..ea3526778d 100644
--- a/src/openai/resources/beta/chat/completions.py
+++ b/src/openai/resources/beta/chat/completions.py
@@ -42,6 +42,7 @@ def parse(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -121,6 +122,7 @@ class MathResponse(BaseModel):
             functions=functions,
             logit_bias=logit_bias,
             logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
@@ -157,6 +159,7 @@ def stream(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -216,6 +219,7 @@ def stream(
             functions=functions,
             logit_bias=logit_bias,
             logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
@@ -254,6 +258,7 @@ async def parse(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -333,6 +338,7 @@ class MathResponse(BaseModel):
             functions=functions,
             logit_bias=logit_bias,
             logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
@@ -369,6 +375,7 @@ def stream(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -429,6 +436,7 @@ def stream(
             functions=functions,
             logit_bias=logit_bias,
             logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 807027a644..3fb1cc77aa 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -167,11 +167,11 @@ def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -311,11 +311,11 @@ def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -451,11 +451,11 @@ def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1529,11 +1529,11 @@ async def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1673,11 +1673,11 @@ async def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1813,11 +1813,11 @@ async def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 031121e5cc..49b0e4b37e 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -335,11 +335,11 @@ def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -469,11 +469,11 @@ def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -599,11 +599,11 @@ def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1169,11 +1169,11 @@ async def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1303,11 +1303,11 @@ async def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1433,11 +1433,11 @@ async def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index ec76bfdf52..e9267b1f03 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -64,6 +64,7 @@ def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -132,13 +133,17 @@ def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -161,11 +166,11 @@ def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -185,8 +190,11 @@ def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -261,6 +269,7 @@ def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -335,13 +344,17 @@ def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -364,11 +377,11 @@ def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -388,8 +401,11 @@ def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -457,6 +473,7 @@ def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -531,13 +548,17 @@ def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -560,11 +581,11 @@ def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -584,8 +605,11 @@ def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -652,6 +676,7 @@ def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -687,6 +712,7 @@ def create(
                     "functions": functions,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
@@ -746,6 +772,7 @@ async def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -814,13 +841,17 @@ async def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -843,11 +874,11 @@ async def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -867,8 +898,11 @@ async def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -943,6 +977,7 @@ async def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -1017,13 +1052,17 @@ async def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -1046,11 +1085,11 @@ async def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1070,8 +1109,11 @@ async def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -1139,6 +1181,7 @@ async def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -1213,13 +1256,17 @@ async def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -1242,11 +1289,11 @@ async def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1266,8 +1313,11 @@ async def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -1334,6 +1384,7 @@ async def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -1369,6 +1420,7 @@ async def create(
                     "functions": functions,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
index ca4799e7ac..44abf1cfe1 100644
--- a/src/openai/resources/fine_tuning/jobs/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -114,7 +114,7 @@ def create(
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
@@ -405,7 +405,7 @@ async def create(
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index c6a0a4cfcf..b4da08745d 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -90,11 +90,11 @@ class Assistant(BaseModel):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index c1360b5b66..eca4da0a2b 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -58,11 +58,11 @@ class AssistantCreateParams(TypedDict, total=False):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index ade565819f..5396233937 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -50,11 +50,11 @@ class AssistantUpdateParams(TypedDict, total=False):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/file_search_tool.py b/src/openai/types/beta/file_search_tool.py
index 4015b3da09..aee6593e89 100644
--- a/src/openai/types/beta/file_search_tool.py
+++ b/src/openai/types/beta/file_search_tool.py
@@ -9,16 +9,16 @@
 
 
 class FileSearchRankingOptions(BaseModel):
-    ranker: Optional[Literal["auto", "default_2024_08_21"]] = None
-    """The ranker to use for the file search.
+    score_threshold: float
+    """The score threshold for the file search.
 
-    If not specified will use the `auto` ranker.
+    All values must be a floating point number between 0 and 1.
     """
 
-    score_threshold: Optional[float] = None
-    """The score threshold for the file search.
+    ranker: Optional[Literal["auto", "default_2024_08_21"]] = None
+    """The ranker to use for the file search.
 
-    All values must be a floating point number between 0 and 1.
+    If not specified will use the `auto` ranker.
     """
 
 
@@ -38,6 +38,9 @@ class FileSearch(BaseModel):
     ranking_options: Optional[FileSearchRankingOptions] = None
     """The ranking options for the file search.
 
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
     See the
     [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
     for more information.
diff --git a/src/openai/types/beta/file_search_tool_param.py b/src/openai/types/beta/file_search_tool_param.py
index 97e651b0da..5ce91207ba 100644
--- a/src/openai/types/beta/file_search_tool_param.py
+++ b/src/openai/types/beta/file_search_tool_param.py
@@ -8,16 +8,16 @@
 
 
 class FileSearchRankingOptions(TypedDict, total=False):
-    ranker: Literal["auto", "default_2024_08_21"]
-    """The ranker to use for the file search.
+    score_threshold: Required[float]
+    """The score threshold for the file search.
 
-    If not specified will use the `auto` ranker.
+    All values must be a floating point number between 0 and 1.
     """
 
-    score_threshold: float
-    """The score threshold for the file search.
+    ranker: Literal["auto", "default_2024_08_21"]
+    """The ranker to use for the file search.
 
-    All values must be a floating point number between 0 and 1.
+    If not specified will use the `auto` ranker.
     """
 
 
@@ -37,6 +37,9 @@ class FileSearch(TypedDict, total=False):
     ranking_options: FileSearchRankingOptions
     """The ranking options for the file search.
 
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
     See the
     [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
     for more information.
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index 370c2f9bce..20d525fa1a 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -98,11 +98,11 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 0579e229d8..5abc1de295 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -172,11 +172,11 @@ class Run(BaseModel):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index 7c5f571d58..824cb1a041 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -111,11 +111,11 @@ class RunCreateParamsBase(TypedDict, total=False):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index b86dab742b..4ed89b00f5 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -87,15 +87,22 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     `content` of `message`.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    An upper bound for the number of tokens that can be generated for a completion,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
     max_tokens: Optional[int]
     """
     The maximum number of [tokens](/tokenizer) that can be generated in the chat
-    completion.
+    completion. This value can be used to control
+    [costs](https://openai.com/api/pricing/) for text generated via API.
 
-    The total length of input tokens and generated tokens is limited by the model's
-    context length.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    This value is now deprecated in favor of `max_completion_tokens`, and is not
+    compatible with
+    [o1 series models](https://platform.openai.com/docs/guides/reasoning).
     """
 
     n: Optional[int]
@@ -130,11 +137,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -160,8 +167,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     This parameter is relevant for customers subscribed to the scale tier service:
 
-    - If set to 'auto', the system will utilize scale tier credits until they are
-      exhausted.
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
     - If set to 'default', the request will be processed using the default service
       tier with a lower uptime SLA and no latency guarentee.
     - When not set, the default behavior is 'auto'.
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
index 09bc081f7a..f8438c75c8 100644
--- a/src/openai/types/chat_model.py
+++ b/src/openai/types/chat_model.py
@@ -5,9 +5,14 @@
 __all__ = ["ChatModel"]
 
 ChatModel: TypeAlias = Literal[
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o1-mini",
+    "o1-mini-2024-09-12",
     "gpt-4o",
-    "gpt-4o-2024-05-13",
     "gpt-4o-2024-08-06",
+    "gpt-4o-2024-05-13",
+    "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",
     "gpt-4-turbo",
diff --git a/src/openai/types/completion_usage.py b/src/openai/types/completion_usage.py
index ac09afd479..a4b9116e35 100644
--- a/src/openai/types/completion_usage.py
+++ b/src/openai/types/completion_usage.py
@@ -1,9 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 
 from .._models import BaseModel
 
-__all__ = ["CompletionUsage"]
+__all__ = ["CompletionUsage", "CompletionTokensDetails"]
+
+
+class CompletionTokensDetails(BaseModel):
+    reasoning_tokens: Optional[int] = None
+    """Tokens generated by the model for reasoning."""
 
 
 class CompletionUsage(BaseModel):
@@ -15,3 +21,6 @@ class CompletionUsage(BaseModel):
 
     total_tokens: int
     """Total number of tokens used in the request (prompt + completion)."""
+
+    completion_tokens_details: Optional[CompletionTokensDetails] = None
+    """Breakdown of tokens used in a completion."""
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index e9be2ef1ca..8f5ea86274 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -50,7 +50,7 @@ class JobCreateParams(TypedDict, total=False):
 
     suffix: Optional[str]
     """
-    A string of up to 18 characters that will be added to your fine-tuned model
+    A string of up to 64 characters that will be added to your fine-tuned model
     name.
 
     For example, a `suffix` of "custom-model-name" would produce a model name like
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 0b89fbf9cd..c44703a434 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -55,6 +55,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
             n=1,
             parallel_tool_calls=True,
@@ -175,6 +176,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
             n=1,
             parallel_tool_calls=True,
@@ -314,6 +316,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
             n=1,
             parallel_tool_calls=True,
@@ -434,6 +437,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
             n=1,
             parallel_tool_calls=True,
diff --git a/tests/lib/chat/test_completions.py b/tests/lib/chat/test_completions.py
index d67d5129cd..e7b9c4f1fd 100644
--- a/tests/lib/chat/test_completions.py
+++ b/tests/lib/chat/test_completions.py
@@ -74,7 +74,7 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_845eaabc1f',
-    usage=CompletionUsage(completion_tokens=28, prompt_tokens=14, total_tokens=42)
+    usage=CompletionUsage(completion_tokens=28, completion_tokens_details=None, prompt_tokens=14, total_tokens=42)
 )
 """
     )
@@ -129,7 +129,7 @@ class Location(BaseModel):
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_2a322c9ffc',
-    usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31)
+    usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31)
 )
 """
     )
@@ -186,7 +186,7 @@ class Location(BaseModel):
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_2a322c9ffc',
-    usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31)
+    usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31)
 )
 """
     )
@@ -368,7 +368,7 @@ class CalendarEvent:
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_2a322c9ffc',
-    usage=CompletionUsage(completion_tokens=17, prompt_tokens=32, total_tokens=49)
+    usage=CompletionUsage(completion_tokens=17, completion_tokens_details=None, prompt_tokens=32, total_tokens=49)
 )
 """
     )
diff --git a/tests/lib/chat/test_completions_streaming.py b/tests/lib/chat/test_completions_streaming.py
index c3dd69ad57..5ad1f084d2 100644
--- a/tests/lib/chat/test_completions_streaming.py
+++ b/tests/lib/chat/test_completions_streaming.py
@@ -155,7 +155,7 @@ def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStream
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_845eaabc1f',
-    usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31)
+    usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31)
 )
 """
     )

From 37be8a03696ce77758f0f9e5f62ceedd982ebef9 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 12 Sep 2024 17:03:58 +0000
Subject: [PATCH 4/4] release: 1.45.0

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 14 ++++++++++++++
 pyproject.toml                |  2 +-
 src/openai/_version.py        |  2 +-
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 1ee5dee6dd..6d2723c72a 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.44.1"
+  ".": "1.45.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 47fa2d9208..8382675b73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 
+## 1.45.0 (2024-09-12)
+
+Full Changelog: [v1.44.1...v1.45.0](https://github.com/openai/openai-python/compare/v1.44.1...v1.45.0)
+
+### Features
+
+* **api:** add o1 models ([#1708](https://github.com/openai/openai-python/issues/1708)) ([06bd42e](https://github.com/openai/openai-python/commit/06bd42e77121a6abd4826a79ce1848812d956576))
+* **errors:** include completion in LengthFinishReasonError ([#1701](https://github.com/openai/openai-python/issues/1701)) ([b0e3256](https://github.com/openai/openai-python/commit/b0e32562aff9aceafec994d3b047f7c2a9f11524))
+
+
+### Bug Fixes
+
+* **types:** correctly mark stream discriminator as optional ([#1706](https://github.com/openai/openai-python/issues/1706)) ([80f02f9](https://github.com/openai/openai-python/commit/80f02f9e5f83fac9cd2f4172b733a92ad01399b2))
+
 ## 1.44.1 (2024-09-09)
 
 Full Changelog: [v1.44.0...v1.44.1](https://github.com/openai/openai-python/compare/v1.44.0...v1.44.1)
diff --git a/pyproject.toml b/pyproject.toml
index d4b7d2b210..178c3db355 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.44.1"
+version = "1.45.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 39c7f63e1e..0d8ebf418b 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.44.1"  # x-release-please-version
+__version__ = "1.45.0"  # x-release-please-version