diff --git a/Playground/Playground/Views/AppView.swift b/Playground/Playground/Views/AppView.swift
index 90335eb..96c6830 100644
--- a/Playground/Playground/Views/AppView.swift
+++ b/Playground/Playground/Views/AppView.swift
@@ -30,6 +30,10 @@ struct AppView: View {
}
if provider == .openai {
+ NavigationLink("Predicted Outputs") {
+ PredictedOutputsView(provider: provider)
+ }
+
NavigationLink("Response Format") {
ResponseFormatView(provider: provider)
}
diff --git a/Playground/Playground/Views/PredictedOutputsView.swift b/Playground/Playground/Views/PredictedOutputsView.swift
new file mode 100644
index 0000000..557306f
--- /dev/null
+++ b/Playground/Playground/Views/PredictedOutputsView.swift
@@ -0,0 +1,182 @@
+//
+// PredictedOutputsView.swift
+// Playground
+//
+// Created by Kevin Hermawan on 11/5/24.
+//
+
+import SwiftUI
+import LLMChatOpenAI
+
+struct PredictedOutputsView: View {
+ let provider: ServiceProvider
+
+ @Environment(AppViewModel.self) private var viewModel
+ @State private var isPreferencesPresented: Bool = false
+
+ @State private var prompt: String = "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting."
+ @State private var response: String = ""
+ @State private var acceptedPredictionTokens: Int = 0
+ @State private var rejectedPredictionTokens: Int = 0
+ @State private var inputTokens: Int = 0
+ @State private var outputTokens: Int = 0
+ @State private var totalTokens: Int = 0
+
+ private let prediction = """
+ ///
+ /// Represents a user with a first name, last name, and username.
+ ///
+ public class User
+ {
+ ///
+ /// Gets or sets the user's first name.
+ ///
+ public string FirstName { get; set; }
+
+ ///
+ /// Gets or sets the user's last name.
+ ///
+ public string LastName { get; set; }
+
+ ///
+ /// Gets or sets the user's username.
+ ///
+ public string Username { get; set; }
+ }
+ """
+
+ var body: some View {
+ @Bindable var viewModelBindable = viewModel
+
+ VStack {
+ Form {
+ Section("Prompt") {
+ TextField("Prompt", text: $prompt)
+ }
+
+ Section("Prediction") {
+ Text(prediction)
+ }
+
+ Section("Response") {
+ Text(response)
+ }
+
+ Section("Prediction Section") {
+ Text("Accepted Prediction Tokens")
+ .badge(acceptedPredictionTokens.formatted())
+
+ Text("Rejected Prediction Tokens")
+ .badge(rejectedPredictionTokens.formatted())
+ }
+
+ UsageSection(inputTokens: inputTokens, outputTokens: outputTokens, totalTokens: totalTokens)
+ }
+
+ VStack {
+ SendButton(stream: viewModel.stream, onSend: onSend, onStream: onStream)
+ }
+ }
+ .toolbar {
+ ToolbarItem(placement: .principal) {
+ NavigationTitle("Predicted Outputs")
+ }
+
+ ToolbarItem(placement: .primaryAction) {
+ Button("Preferences", systemImage: "gearshape", action: { isPreferencesPresented.toggle() })
+ }
+ }
+ .sheet(isPresented: $isPreferencesPresented) {
+ PreferencesView()
+ }
+ .onAppear {
+ viewModel.setup(for: provider)
+ }
+ .onDisappear {
+ viewModel.selectedModel = ""
+ }
+ }
+
+ private func onSend() {
+ clear()
+
+ let messages = [
+ ChatMessage(role: .user, content: prompt),
+ ChatMessage(role: .user, content: prediction)
+ ]
+
+ let options = ChatOptions(
+ prediction: .init(type: .content, content: [.init(type: "text", text: prediction)]),
+ temperature: viewModel.temperature
+ )
+
+ Task {
+ do {
+ let completion = try await viewModel.chat.send(model: viewModel.selectedModel, messages: messages, options: options)
+
+ if let content = completion.choices.first?.message.content {
+ self.response = content
+ }
+
+ if let usage = completion.usage {
+ if let completionTokensDetails = usage.completionTokensDetails {
+ self.acceptedPredictionTokens = completionTokensDetails.acceptedPredictionTokens
+ self.rejectedPredictionTokens = completionTokensDetails.rejectedPredictionTokens
+ }
+
+ self.inputTokens = usage.promptTokens
+ self.outputTokens = usage.completionTokens
+ self.totalTokens = usage.totalTokens
+ }
+ } catch {
+ print(String(describing: error))
+ }
+ }
+ }
+
+ private func onStream() {
+ clear()
+
+ let messages = [
+ ChatMessage(role: .user, content: prompt),
+ ChatMessage(role: .user, content: prediction)
+ ]
+
+ let options = ChatOptions(
+ prediction: .init(type: .content, content: prediction),
+ temperature: viewModel.temperature
+ )
+
+ Task {
+ do {
+ for try await chunk in viewModel.chat.stream(model: viewModel.selectedModel, messages: messages, options: options) {
+ if let content = chunk.choices.first?.delta.content {
+ self.response += content
+ }
+
+ if let usage = chunk.usage {
+ if let completionTokensDetails = usage.completionTokensDetails {
+ self.acceptedPredictionTokens = completionTokensDetails.acceptedPredictionTokens
+ self.rejectedPredictionTokens = completionTokensDetails.rejectedPredictionTokens
+ }
+
+ self.inputTokens = usage.promptTokens ?? 0
+ self.outputTokens = usage.completionTokens ?? 0
+ self.totalTokens = usage.totalTokens ?? 0
+ }
+ }
+ } catch {
+ print(String(describing: error))
+ }
+ }
+ }
+
+ private func clear() {
+ response = ""
+ acceptedPredictionTokens = 0
+ rejectedPredictionTokens = 0
+ inputTokens = 0
+ outputTokens = 0
+ totalTokens = 0
+ }
+}
diff --git a/README.md b/README.md
index df855a9..2f70489 100644
--- a/README.md
+++ b/README.md
@@ -199,6 +199,54 @@ Task {
To learn more about function calling, check out the [OpenAI documentation](https://platform.openai.com/docs/guides/function-calling).
+#### Predicted Outputs
+
+```swift
+private let code = """
+///
+/// Represents a user with a first name, last name, and username.
+///
+public class User
+{
+ ///
+ /// Gets or sets the user's first name.
+ ///
+ public string FirstName { get; set; }
+
+ ///
+ /// Gets or sets the user's last name.
+ ///
+ public string LastName { get; set; }
+
+ ///
+ /// Gets or sets the user's username.
+ ///
+ public string Username { get; set; }
+}
+"""
+
+let messages = [
+ ChatMessage(role: .user, content: "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting."),
+ ChatMessage(role: .user, content: code)
+]
+
+let options = ChatOptions(
+ prediction: .init(type: .content, content: code)
+)
+
+Task {
+ do {
+ let completion = try await chat.send(model: "gpt-4o", messages: messages, options: options)
+
+ print(completion.choices.first?.message.content ?? "")
+ } catch {
+ print(String(describing: error))
+ }
+}
+```
+
+To learn more about predicted outputs, check out the [OpenAI documentation](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs).
+
#### Structured Outputs
```swift
diff --git a/Sources/LLMChatOpenAI/ChatCompletion.swift b/Sources/LLMChatOpenAI/ChatCompletion.swift
index b55a3c7..efbd611 100644
--- a/Sources/LLMChatOpenAI/ChatCompletion.swift
+++ b/Sources/LLMChatOpenAI/ChatCompletion.swift
@@ -161,11 +161,20 @@ public struct ChatCompletion: Decodable, Sendable {
public let promptTokensDetails: PromptTokensDetails?
public struct CompletionTokensDetails: Decodable, Sendable {
+ /// When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
+ public let acceptedPredictionTokens: Int
+
+ /// When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion.
+ /// However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
+ public let rejectedPredictionTokens: Int
+
/// Tokens generated by the model for reasoning.
public let reasoningTokens: Int
private enum CodingKeys: String, CodingKey {
+ case acceptedPredictionTokens = "accepted_prediction_tokens"
case reasoningTokens = "reasoning_tokens"
+ case rejectedPredictionTokens = "rejected_prediction_tokens"
}
}
diff --git a/Sources/LLMChatOpenAI/ChatCompletionChunk.swift b/Sources/LLMChatOpenAI/ChatCompletionChunk.swift
index 598613e..6b2dbef 100644
--- a/Sources/LLMChatOpenAI/ChatCompletionChunk.swift
+++ b/Sources/LLMChatOpenAI/ChatCompletionChunk.swift
@@ -153,11 +153,20 @@ public struct ChatCompletionChunk: Decodable, Sendable {
public let promptTokensDetails: PromptTokensDetails?
public struct CompletionTokensDetails: Decodable, Sendable {
+ /// When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
+ public let acceptedPredictionTokens: Int
+
+ /// When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion.
+ /// However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
+ public let rejectedPredictionTokens: Int
+
/// Tokens generated by the model for reasoning.
public let reasoningTokens: Int
private enum CodingKeys: String, CodingKey {
+ case acceptedPredictionTokens = "accepted_prediction_tokens"
case reasoningTokens = "reasoning_tokens"
+ case rejectedPredictionTokens = "rejected_prediction_tokens"
}
}
diff --git a/Sources/LLMChatOpenAI/ChatOptions.swift b/Sources/LLMChatOpenAI/ChatOptions.swift
index ea8a580..b01de11 100644
--- a/Sources/LLMChatOpenAI/ChatOptions.swift
+++ b/Sources/LLMChatOpenAI/ChatOptions.swift
@@ -33,6 +33,10 @@ public struct ChatOptions: Encodable, Sendable {
/// Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as 1 to minimize costs.
public let n: Int?
+ /// Configuration for a [Predicted Output](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs), which can greatly improve response times when large parts of the model response are known ahead of time.
+ /// This is most common when you are regenerating a file with only minor changes to most of the content.
+ public let prediction: Prediction?
+
/// Number between -2.0 and 2.0.
/// Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
public let presencePenalty: Double?
@@ -87,6 +91,7 @@ public struct ChatOptions: Encodable, Sendable {
topLogprobs: Int? = nil,
maxCompletionTokens: Int? = nil,
n: Int? = nil,
+ prediction: Prediction? = nil,
presencePenalty: Double? = nil,
responseFormat: ResponseFormat? = nil,
seed: Int? = nil,
@@ -105,6 +110,7 @@ public struct ChatOptions: Encodable, Sendable {
self.topLogprobs = topLogprobs
self.maxCompletionTokens = maxCompletionTokens
self.n = n
+ self.prediction = prediction
self.presencePenalty = presencePenalty
self.responseFormat = responseFormat
self.seed = seed
@@ -281,6 +287,7 @@ public struct ChatOptions: Encodable, Sendable {
try container.encodeIfPresent(topLogprobs, forKey: .topLogprobs)
try container.encodeIfPresent(maxCompletionTokens, forKey: .maxCompletionTokens)
try container.encodeIfPresent(n, forKey: .n)
+ try container.encodeIfPresent(prediction, forKey: .prediction)
try container.encodeIfPresent(presencePenalty, forKey: .presencePenalty)
try container.encodeIfPresent(responseFormat, forKey: .responseFormat)
try container.encodeIfPresent(seed, forKey: .seed)
@@ -301,6 +308,7 @@ public struct ChatOptions: Encodable, Sendable {
case topLogprobs = "top_logprobs"
case maxCompletionTokens = "max_completion_tokens"
case n
+ case prediction
case presencePenalty = "presence_penalty"
case responseFormat = "response_format"
case seed
@@ -314,3 +322,70 @@ public struct ChatOptions: Encodable, Sendable {
case user
}
}
+
+// MARK: - Prediction
+extension ChatOptions {
+ public struct Prediction: Encodable, Sendable {
+ /// The type of the predicted content you want to provide. This type is currently always `.content`.
+ public let type: PredictionType
+
+ private let content: String?
+ private let contents: [Content]?
+
+ /// The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
+ public struct Content: Encodable, Sendable {
+ /// The type of the content part.
+ public let type: String
+
+ /// The text content.
+ public let text: String
+
+ /// Initializes a new instance of ``Content``.
+ public init(type: String, text: String) {
+ self.type = type
+ self.text = text
+ }
+ }
+
+ /// The type of the predicted content you want to provide.
+ public enum PredictionType: String, Encodable, Sendable {
+ case content
+ }
+
+ /// Initializes a new instance of ``Prediction`` with a single content string.
+ /// - Parameters:
+ /// - type: The type of the predicted content you want to provide. This type is currently always `.content`.
+ /// - content: The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
+ public init(type: PredictionType, content: String) {
+ self.type = type
+ self.content = content
+ self.contents = nil
+ }
+
+ /// Initializes a new instance of ``Prediction`` with an array of content parts with a defined type.
+ /// - Parameters:
+ /// - type: The type of the predicted content you want to provide. This type is currently always `.content`.
+ /// - content: An array of content parts with a defined type. Supported options differ based on the model being used to generate the response. Can contain text inputs.
+ public init(type: PredictionType, content: [Content]) {
+ self.type = type
+ self.content = nil
+ self.contents = content
+ }
+
+ public func encode(to encoder: Encoder) throws {
+ var container = encoder.container(keyedBy: CodingKeys.self)
+ try container.encode(type, forKey: .type)
+
+ if let content {
+ try container.encode(content, forKey: .content)
+ } else if let contents {
+ try container.encode(contents, forKey: .content)
+ }
+ }
+
+ private enum CodingKeys: String, CodingKey {
+ case type
+ case content
+ }
+ }
+}
diff --git a/Sources/LLMChatOpenAI/Documentation.docc/Documentation.md b/Sources/LLMChatOpenAI/Documentation.docc/Documentation.md
index 5c38604..f0a6370 100644
--- a/Sources/LLMChatOpenAI/Documentation.docc/Documentation.md
+++ b/Sources/LLMChatOpenAI/Documentation.docc/Documentation.md
@@ -170,6 +170,54 @@ Task {
To learn more about function calling, check out the [OpenAI documentation](https://platform.openai.com/docs/guides/function-calling).
+#### Predicted Outputs
+
+```swift
+let code = """
+///
+/// Represents a user with a first name, last name, and username.
+///
+public class User
+{
+ ///
+ /// Gets or sets the user's first name.
+ ///
+ public string FirstName { get; set; }
+
+ ///
+ /// Gets or sets the user's last name.
+ ///
+ public string LastName { get; set; }
+
+ ///
+ /// Gets or sets the user's username.
+ ///
+ public string Username { get; set; }
+}
+"""
+
+let messages = [
+ ChatMessage(role: .user, content: "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting."),
+ ChatMessage(role: .user, content: code)
+]
+
+let options = ChatOptions(
+ prediction: .init(type: .content, content: code)
+)
+
+Task {
+ do {
+ let completion = try await chat.send(model: "gpt-4o", messages: messages, options: options)
+
+ print(completion.choices.first?.message.content ?? "")
+ } catch {
+ print(String(describing: error))
+ }
+}
+```
+
+To learn more about predicted outputs, check out the [OpenAI documentation](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs).
+
#### Structured Outputs
```swift