diff --git a/Playground/Playground/Views/AppView.swift b/Playground/Playground/Views/AppView.swift index 90335eb..96c6830 100644 --- a/Playground/Playground/Views/AppView.swift +++ b/Playground/Playground/Views/AppView.swift @@ -30,6 +30,10 @@ struct AppView: View { } if provider == .openai { + NavigationLink("Predicted Outputs") { + PredictedOutputsView(provider: provider) + } + NavigationLink("Response Format") { ResponseFormatView(provider: provider) } diff --git a/Playground/Playground/Views/PredictedOutputsView.swift b/Playground/Playground/Views/PredictedOutputsView.swift new file mode 100644 index 0000000..557306f --- /dev/null +++ b/Playground/Playground/Views/PredictedOutputsView.swift @@ -0,0 +1,182 @@ +// +// PredictedOutputsView.swift +// Playground +// +// Created by Kevin Hermawan on 11/5/24. +// + +import SwiftUI +import LLMChatOpenAI + +struct PredictedOutputsView: View { + let provider: ServiceProvider + + @Environment(AppViewModel.self) private var viewModel + @State private var isPreferencesPresented: Bool = false + + @State private var prompt: String = "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting." + @State private var response: String = "" + @State private var acceptedPredictionTokens: Int = 0 + @State private var rejectedPredictionTokens: Int = 0 + @State private var inputTokens: Int = 0 + @State private var outputTokens: Int = 0 + @State private var totalTokens: Int = 0 + + private let prediction = """ + /// + /// Represents a user with a first name, last name, and username. + /// + public class User + { + /// + /// Gets or sets the user's first name. + /// + public string FirstName { get; set; } + + /// + /// Gets or sets the user's last name. + /// + public string LastName { get; set; } + + /// + /// Gets or sets the user's username. + /// + public string Username { get; set; } + } + """ + + var body: some View { + @Bindable var viewModelBindable = viewModel + + VStack { + Form { + Section("Prompt") { + TextField("Prompt", text: $prompt) + } + + Section("Prediction") { + Text(prediction) + } + + Section("Response") { + Text(response) + } + + Section("Prediction Section") { + Text("Accepted Prediction Tokens") + .badge(acceptedPredictionTokens.formatted()) + + Text("Rejected Prediction Tokens") + .badge(rejectedPredictionTokens.formatted()) + } + + UsageSection(inputTokens: inputTokens, outputTokens: outputTokens, totalTokens: totalTokens) + } + + VStack { + SendButton(stream: viewModel.stream, onSend: onSend, onStream: onStream) + } + } + .toolbar { + ToolbarItem(placement: .principal) { + NavigationTitle("Predicted Outputs") + } + + ToolbarItem(placement: .primaryAction) { + Button("Preferences", systemImage: "gearshape", action: { isPreferencesPresented.toggle() }) + } + } + .sheet(isPresented: $isPreferencesPresented) { + PreferencesView() + } + .onAppear { + viewModel.setup(for: provider) + } + .onDisappear { + viewModel.selectedModel = "" + } + } + + private func onSend() { + clear() + + let messages = [ + ChatMessage(role: .user, content: prompt), + ChatMessage(role: .user, content: prediction) + ] + + let options = ChatOptions( + prediction: .init(type: .content, content: [.init(type: "text", text: prediction)]), + temperature: viewModel.temperature + ) + + Task { + do { + let completion = try await viewModel.chat.send(model: viewModel.selectedModel, messages: messages, options: options) + + if let content = completion.choices.first?.message.content { + self.response = content + } + + if let usage = completion.usage { + if let completionTokensDetails = usage.completionTokensDetails { + self.acceptedPredictionTokens = completionTokensDetails.acceptedPredictionTokens + self.rejectedPredictionTokens = completionTokensDetails.rejectedPredictionTokens + } + + self.inputTokens = usage.promptTokens + self.outputTokens = usage.completionTokens + self.totalTokens = usage.totalTokens + } + } catch { + print(String(describing: error)) + } + } + } + + private func onStream() { + clear() + + let messages = [ + ChatMessage(role: .user, content: prompt), + ChatMessage(role: .user, content: prediction) + ] + + let options = ChatOptions( + prediction: .init(type: .content, content: prediction), + temperature: viewModel.temperature + ) + + Task { + do { + for try await chunk in viewModel.chat.stream(model: viewModel.selectedModel, messages: messages, options: options) { + if let content = chunk.choices.first?.delta.content { + self.response += content + } + + if let usage = chunk.usage { + if let completionTokensDetails = usage.completionTokensDetails { + self.acceptedPredictionTokens = completionTokensDetails.acceptedPredictionTokens + self.rejectedPredictionTokens = completionTokensDetails.rejectedPredictionTokens + } + + self.inputTokens = usage.promptTokens ?? 0 + self.outputTokens = usage.completionTokens ?? 0 + self.totalTokens = usage.totalTokens ?? 0 + } + } + } catch { + print(String(describing: error)) + } + } + } + + private func clear() { + response = "" + acceptedPredictionTokens = 0 + rejectedPredictionTokens = 0 + inputTokens = 0 + outputTokens = 0 + totalTokens = 0 + } +} diff --git a/README.md b/README.md index df855a9..2f70489 100644 --- a/README.md +++ b/README.md @@ -199,6 +199,54 @@ Task { To learn more about function calling, check out the [OpenAI documentation](https://platform.openai.com/docs/guides/function-calling). +#### Predicted Outputs + +```swift +private let code = """ +/// +/// Represents a user with a first name, last name, and username. +/// +public class User +{ + /// + /// Gets or sets the user's first name. + /// + public string FirstName { get; set; } + + /// + /// Gets or sets the user's last name. + /// + public string LastName { get; set; } + + /// + /// Gets or sets the user's username. + /// + public string Username { get; set; } +} +""" + +let messages = [ + ChatMessage(role: .user, content: "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting."), + ChatMessage(role: .user, content: code) +] + +let options = ChatOptions( + prediction: .init(type: .content, content: code) +) + +Task { + do { + let completion = try await chat.send(model: "gpt-4o", messages: messages, options: options) + + print(completion.choices.first?.message.content ?? "") + } catch { + print(String(describing: error)) + } +} +``` + +To learn more about predicted outputs, check out the [OpenAI documentation](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs). + #### Structured Outputs ```swift diff --git a/Sources/LLMChatOpenAI/ChatCompletion.swift b/Sources/LLMChatOpenAI/ChatCompletion.swift index b55a3c7..efbd611 100644 --- a/Sources/LLMChatOpenAI/ChatCompletion.swift +++ b/Sources/LLMChatOpenAI/ChatCompletion.swift @@ -161,11 +161,20 @@ public struct ChatCompletion: Decodable, Sendable { public let promptTokensDetails: PromptTokensDetails? public struct CompletionTokensDetails: Decodable, Sendable { + /// When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion. + public let acceptedPredictionTokens: Int + + /// When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. + /// However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits. + public let rejectedPredictionTokens: Int + /// Tokens generated by the model for reasoning. public let reasoningTokens: Int private enum CodingKeys: String, CodingKey { + case acceptedPredictionTokens = "accepted_prediction_tokens" case reasoningTokens = "reasoning_tokens" + case rejectedPredictionTokens = "rejected_prediction_tokens" } } diff --git a/Sources/LLMChatOpenAI/ChatCompletionChunk.swift b/Sources/LLMChatOpenAI/ChatCompletionChunk.swift index 598613e..6b2dbef 100644 --- a/Sources/LLMChatOpenAI/ChatCompletionChunk.swift +++ b/Sources/LLMChatOpenAI/ChatCompletionChunk.swift @@ -153,11 +153,20 @@ public struct ChatCompletionChunk: Decodable, Sendable { public let promptTokensDetails: PromptTokensDetails? public struct CompletionTokensDetails: Decodable, Sendable { + /// When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion. + public let acceptedPredictionTokens: Int + + /// When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. + /// However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits. + public let rejectedPredictionTokens: Int + /// Tokens generated by the model for reasoning. public let reasoningTokens: Int private enum CodingKeys: String, CodingKey { + case acceptedPredictionTokens = "accepted_prediction_tokens" case reasoningTokens = "reasoning_tokens" + case rejectedPredictionTokens = "rejected_prediction_tokens" } } diff --git a/Sources/LLMChatOpenAI/ChatOptions.swift b/Sources/LLMChatOpenAI/ChatOptions.swift index ea8a580..b01de11 100644 --- a/Sources/LLMChatOpenAI/ChatOptions.swift +++ b/Sources/LLMChatOpenAI/ChatOptions.swift @@ -33,6 +33,10 @@ public struct ChatOptions: Encodable, Sendable { /// Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as 1 to minimize costs. public let n: Int? + /// Configuration for a [Predicted Output](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs), which can greatly improve response times when large parts of the model response are known ahead of time. + /// This is most common when you are regenerating a file with only minor changes to most of the content. + public let prediction: Prediction? + /// Number between -2.0 and 2.0. /// Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. public let presencePenalty: Double? @@ -87,6 +91,7 @@ public struct ChatOptions: Encodable, Sendable { topLogprobs: Int? = nil, maxCompletionTokens: Int? = nil, n: Int? = nil, + prediction: Prediction? = nil, presencePenalty: Double? = nil, responseFormat: ResponseFormat? = nil, seed: Int? = nil, @@ -105,6 +110,7 @@ public struct ChatOptions: Encodable, Sendable { self.topLogprobs = topLogprobs self.maxCompletionTokens = maxCompletionTokens self.n = n + self.prediction = prediction self.presencePenalty = presencePenalty self.responseFormat = responseFormat self.seed = seed @@ -281,6 +287,7 @@ public struct ChatOptions: Encodable, Sendable { try container.encodeIfPresent(topLogprobs, forKey: .topLogprobs) try container.encodeIfPresent(maxCompletionTokens, forKey: .maxCompletionTokens) try container.encodeIfPresent(n, forKey: .n) + try container.encodeIfPresent(prediction, forKey: .prediction) try container.encodeIfPresent(presencePenalty, forKey: .presencePenalty) try container.encodeIfPresent(responseFormat, forKey: .responseFormat) try container.encodeIfPresent(seed, forKey: .seed) @@ -301,6 +308,7 @@ public struct ChatOptions: Encodable, Sendable { case topLogprobs = "top_logprobs" case maxCompletionTokens = "max_completion_tokens" case n + case prediction case presencePenalty = "presence_penalty" case responseFormat = "response_format" case seed @@ -314,3 +322,70 @@ public struct ChatOptions: Encodable, Sendable { case user } } + +// MARK: - Prediction +extension ChatOptions { + public struct Prediction: Encodable, Sendable { + /// The type of the predicted content you want to provide. This type is currently always `.content`. + public let type: PredictionType + + private let content: String? + private let contents: [Content]? + + /// The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly. + public struct Content: Encodable, Sendable { + /// The type of the content part. + public let type: String + + /// The text content. + public let text: String + + /// Initializes a new instance of ``Content``. + public init(type: String, text: String) { + self.type = type + self.text = text + } + } + + /// The type of the predicted content you want to provide. + public enum PredictionType: String, Encodable, Sendable { + case content + } + + /// Initializes a new instance of ``Prediction`` with a single content string. + /// - Parameters: + /// - type: The type of the predicted content you want to provide. This type is currently always `.content`. + /// - content: The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes. + public init(type: PredictionType, content: String) { + self.type = type + self.content = content + self.contents = nil + } + + /// Initializes a new instance of ``Prediction`` with an array of content parts with a defined type. + /// - Parameters: + /// - type: The type of the predicted content you want to provide. This type is currently always `.content`. + /// - content: An array of content parts with a defined type. Supported options differ based on the model being used to generate the response. Can contain text inputs. + public init(type: PredictionType, content: [Content]) { + self.type = type + self.content = nil + self.contents = content + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(type, forKey: .type) + + if let content { + try container.encode(content, forKey: .content) + } else if let contents { + try container.encode(contents, forKey: .content) + } + } + + private enum CodingKeys: String, CodingKey { + case type + case content + } + } +} diff --git a/Sources/LLMChatOpenAI/Documentation.docc/Documentation.md b/Sources/LLMChatOpenAI/Documentation.docc/Documentation.md index 5c38604..f0a6370 100644 --- a/Sources/LLMChatOpenAI/Documentation.docc/Documentation.md +++ b/Sources/LLMChatOpenAI/Documentation.docc/Documentation.md @@ -170,6 +170,54 @@ Task { To learn more about function calling, check out the [OpenAI documentation](https://platform.openai.com/docs/guides/function-calling). +#### Predicted Outputs + +```swift +let code = """ +/// +/// Represents a user with a first name, last name, and username. +/// +public class User +{ + /// + /// Gets or sets the user's first name. + /// + public string FirstName { get; set; } + + /// + /// Gets or sets the user's last name. + /// + public string LastName { get; set; } + + /// + /// Gets or sets the user's username. + /// + public string Username { get; set; } +} +""" + +let messages = [ + ChatMessage(role: .user, content: "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting."), + ChatMessage(role: .user, content: code) +] + +let options = ChatOptions( + prediction: .init(type: .content, content: code) +) + +Task { + do { + let completion = try await chat.send(model: "gpt-4o", messages: messages, options: options) + + print(completion.choices.first?.message.content ?? "") + } catch { + print(String(describing: error)) + } +} +``` + +To learn more about predicted outputs, check out the [OpenAI documentation](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs). + #### Structured Outputs ```swift