diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java index 2dc34bea254..69590bcad4f 100644 --- a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java +++ b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java @@ -720,6 +720,11 @@ private ChatCompletionsOptions merge(ChatCompletionsOptions fromAzureOptions, mergedAzureOptions.setMaxTokens((fromAzureOptions.getMaxTokens() != null) ? fromAzureOptions.getMaxTokens() : toSpringAiOptions.getMaxTokens()); + if (fromAzureOptions.getMaxCompletionTokens() != null || toSpringAiOptions.getMaxCompletionTokens() != null) { + mergedAzureOptions.setMaxCompletionTokens((fromAzureOptions.getMaxCompletionTokens() != null) + ? fromAzureOptions.getMaxCompletionTokens() : toSpringAiOptions.getMaxCompletionTokens()); + } + mergedAzureOptions.setLogitBias(fromAzureOptions.getLogitBias() != null ? fromAzureOptions.getLogitBias() : toSpringAiOptions.getLogitBias()); @@ -803,6 +808,10 @@ private ChatCompletionsOptions merge(AzureOpenAiChatOptions fromSpringAiOptions, mergedAzureOptions.setMaxTokens(fromSpringAiOptions.getMaxTokens()); } + if (fromSpringAiOptions.getMaxCompletionTokens() != null) { + mergedAzureOptions.setMaxCompletionTokens(fromSpringAiOptions.getMaxCompletionTokens()); + } + if (fromSpringAiOptions.getLogitBias() != null) { mergedAzureOptions.setLogitBias(fromSpringAiOptions.getLogitBias()); } @@ -894,6 +903,9 @@ private ChatCompletionsOptions copy(ChatCompletionsOptions fromOptions) { if (fromOptions.getMaxTokens() != null) { copyOptions.setMaxTokens(fromOptions.getMaxTokens()); } + if (fromOptions.getMaxCompletionTokens() != null) { + copyOptions.setMaxCompletionTokens(fromOptions.getMaxCompletionTokens()); + } if (fromOptions.getLogitBias() != null) { copyOptions.setLogitBias(fromOptions.getLogitBias()); } diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java index da442b4ad4d..bd11a5a868a 100644 --- a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java +++ b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java @@ -1,5 +1,5 @@ /* - * Copyright 2023-2024 the original author or authors. + * Copyright 2023-2025 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -167,6 +167,13 @@ public class AzureOpenAiChatOptions implements ToolCallingChatOptions { @JsonProperty("top_log_probs") private Integer topLogProbs; + /* + * An upper bound for the number of tokens that can be generated for a completion, + * including visible output tokens and reasoning tokens. + */ + @JsonProperty("max_completion_tokens") + private Integer maxCompletionTokens; + /* * If provided, the configuration options for available Azure OpenAI chat * enhancements. @@ -266,6 +273,7 @@ public static AzureOpenAiChatOptions fromOptions(AzureOpenAiChatOptions fromOpti .frequencyPenalty(fromOptions.getFrequencyPenalty() != null ? fromOptions.getFrequencyPenalty() : null) .logitBias(fromOptions.getLogitBias()) .maxTokens(fromOptions.getMaxTokens()) + .maxCompletionTokens(fromOptions.getMaxCompletionTokens()) .N(fromOptions.getN()) .presencePenalty(fromOptions.getPresencePenalty() != null ? fromOptions.getPresencePenalty() : null) .stop(fromOptions.getStop() != null ? new ArrayList<>(fromOptions.getStop()) : null) @@ -300,6 +308,14 @@ public void setMaxTokens(Integer maxTokens) { this.maxTokens = maxTokens; } + public Integer getMaxCompletionTokens() { + return this.maxCompletionTokens; + } + + public void setMaxCompletionTokens(Integer maxCompletionTokens) { + this.maxCompletionTokens = maxCompletionTokens; + } + public Map getLogitBias() { return this.logitBias; } @@ -510,6 +526,7 @@ public boolean equals(Object o) { && Objects.equals(this.enableStreamUsage, that.enableStreamUsage) && Objects.equals(this.reasoningEffort, that.reasoningEffort) && Objects.equals(this.toolContext, that.toolContext) && Objects.equals(this.maxTokens, that.maxTokens) + && Objects.equals(this.maxCompletionTokens, that.maxCompletionTokens) && Objects.equals(this.frequencyPenalty, that.frequencyPenalty) && Objects.equals(this.presencePenalty, that.presencePenalty) && Objects.equals(this.temperature, that.temperature) && Objects.equals(this.topP, that.topP); @@ -520,8 +537,8 @@ public int hashCode() { return Objects.hash(this.logitBias, this.user, this.n, this.stop, this.deploymentName, this.responseFormat, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled, this.seed, this.logprobs, this.topLogProbs, this.enhancements, this.streamOptions, this.reasoningEffort, this.enableStreamUsage, - this.toolContext, this.maxTokens, this.frequencyPenalty, this.presencePenalty, this.temperature, - this.topP); + this.toolContext, this.maxTokens, this.maxCompletionTokens, this.frequencyPenalty, this.presencePenalty, + this.temperature, this.topP); } public static class Builder { @@ -556,6 +573,11 @@ public Builder maxTokens(Integer maxTokens) { return this; } + public Builder maxCompletionTokens(Integer maxCompletionTokens) { + this.options.maxCompletionTokens = maxCompletionTokens; + return this; + } + public Builder N(Integer n) { this.options.n = n; return this; diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/azure-openai-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/azure-openai-chat.adoc index b6576d85684..6dfc04fec2f 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/azure-openai-chat.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/azure-openai-chat.adoc @@ -113,12 +113,12 @@ This is because in OpenAI there is no `Deployment Name`, only a `Model Name`. NOTE: The property `spring.ai.azure.openai.chat.options.model` has been renamed to `spring.ai.azure.openai.chat.options.deployment-name`. -NOTE: If you decide to connect to `OpenAI` instead of `Azure OpenAI`, by setting the `spring.ai.azure.openai.openai-api-key=` property, +NOTE: If you decide to connect to `OpenAI` instead of `Azure OpenAI`, by setting the `spring.ai.azure.openai.openai-api-key=` property, then the `spring.ai.azure.openai.chat.options.deployment-name` is treated as an link:https://platform.openai.com/docs/models[OpenAI model] name. ==== Access the OpenAI Model -You can configure the client to use directly `OpenAI` instead of the `Azure OpenAI` deployed models. +You can configure the client to use directly `OpenAI` instead of the `Azure OpenAI` deployed models. For this you need to set the `spring.ai.azure.openai.openai-api-key=` instead of `spring.ai.azure.openai.api-key=`. === Add Repositories and BOM @@ -197,8 +197,8 @@ The prefix `spring.ai.azure.openai` is the property prefix to configure the conn | spring.ai.azure.openai.api-key | The Key from Azure AI OpenAI `Keys and Endpoint` section under `Resource Management` | - | spring.ai.azure.openai.endpoint | The endpoint from the Azure AI OpenAI `Keys and Endpoint` section under `Resource Management` | - -| spring.ai.azure.openai.openai-api-key | (non Azure) OpenAI API key. Used to authenticate with the OpenAI service, instead of Azure OpenAI. -This automatically sets the endpoint to https://api.openai.com/v1. Use either `api-key` or `openai-api-key` property. +| spring.ai.azure.openai.openai-api-key | (non Azure) OpenAI API key. Used to authenticate with the OpenAI service, instead of Azure OpenAI. +This automatically sets the endpoint to https://api.openai.com/v1. Use either `api-key` or `openai-api-key` property. With this configuration the `spring.ai.azure.openai.chat.options.deployment-name` is treated as an https://platform.openai.com/docs/models[OpenAi Model] name.| - | spring.ai.azure.openai.custom-headers | A map of custom headers to be included in the API requests. Each entry in the map represents a header, where the key is the header name and the value is the header value. | Empty map |==== @@ -223,11 +223,12 @@ The prefix `spring.ai.azure.openai.chat` is the property prefix that configures | spring.ai.azure.openai.chat.enabled (Removed and no longer valid) | Enable Azure OpenAI chat model. | true | spring.ai.model.chat | Enable Azure OpenAI chat model. | azure-openai | spring.ai.azure.openai.chat.options.deployment-name | In use with Azure, this refers to the "Deployment Name" of your model, which you can find at https://oai.azure.com/portal. -It's important to note that within an Azure OpenAI deployment, the "Deployment Name" is distinct from the model itself. -The confusion around these terms stems from the intention to make the Azure OpenAI client library compatible with the original OpenAI endpoint. +It's important to note that within an Azure OpenAI deployment, the "Deployment Name" is distinct from the model itself. +The confusion around these terms stems from the intention to make the Azure OpenAI client library compatible with the original OpenAI endpoint. The deployment structures offered by Azure OpenAI and Sam Altman's OpenAI differ significantly. Deployments model name to provide as part of this completions request. | gpt-4o -| spring.ai.azure.openai.chat.options.maxTokens | The maximum number of tokens to generate. | - +| spring.ai.azure.openai.chat.options.maxTokens | The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. | - +| spring.ai.azure.openai.chat.options.maxCompletionTokens | An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. | - | spring.ai.azure.openai.chat.options.temperature | The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. | 0.7 | spring.ai.azure.openai.chat.options.topP | An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. | - | spring.ai.azure.openai.chat.options.logitBias | A map between GPT token IDs and bias scores that influences the probability of specific tokens appearing in a completions response. Token IDs are computed via external tokenizer tools, while bias scores reside in the range of -100 to 100 with minimum and maximum values corresponding to a full ban or exclusive selection of a token, respectively. The exact behavior of a given bias score varies by model. | - @@ -409,9 +410,9 @@ var openAIClientBuilder = new OpenAIClientBuilder() .endpoint(System.getenv("AZURE_OPENAI_ENDPOINT")); var openAIChatOptions = AzureOpenAiChatOptions.builder() - .deploymentName("gpt-4o") + .deploymentName("gpt-5") .temperature(0.4) - .maxTokens(200) + .maxCompletionTokens(200) .build(); var chatModel = AzureOpenAiChatModel.builder() @@ -429,4 +430,3 @@ Flux streamingResponses = chatModel.stream( ---- NOTE: the `gpt-4o` is actually the `Deployment Name` as presented in the Azure AI Portal. -