11// Copyright (c) Microsoft. All rights reserved.
22
3+ using System ;
34using System . Collections . Generic ;
45using System . Diagnostics . CodeAnalysis ;
56using System . Net . Http ;
1213using Microsoft . KernelMemory . Diagnostics ;
1314using Microsoft . SemanticKernel ;
1415using Microsoft . SemanticKernel . Connectors . AzureOpenAI ;
16+ using OpenAI . Chat ;
1517
1618namespace Microsoft . KernelMemory . AI . AzureOpenAI ;
1719
@@ -28,6 +30,8 @@ public sealed class AzureOpenAITextGenerator : ITextGenerator
2830 private readonly ITextTokenizer _textTokenizer ;
2931 private readonly ILogger < AzureOpenAITextGenerator > _log ;
3032
33+ private readonly string _deployment ;
34+
3135 /// <inheritdoc/>
3236 public int MaxTokenTotal { get ; }
3337
@@ -87,6 +91,7 @@ public AzureOpenAITextGenerator(
8791 {
8892 this . _client = skClient ;
8993 this . _log = ( loggerFactory ?? DefaultLogger . Factory ) . CreateLogger < AzureOpenAITextGenerator > ( ) ;
94+ this . _deployment = config . Deployment ;
9095 this . MaxTokenTotal = config . MaxTokenTotal ;
9196
9297 textTokenizer ??= TokenizerFactory . GetTokenizerForEncoding ( config . Tokenizer ) ;
@@ -114,7 +119,7 @@ public IReadOnlyList<string> GetTokens(string text)
114119 }
115120
116121 /// <inheritdoc/>
117- public async IAsyncEnumerable < string > GenerateTextAsync (
122+ public async IAsyncEnumerable < GeneratedTextContent > GenerateTextAsync (
118123 string prompt ,
119124 TextGenerationOptions options ,
120125 [ EnumeratorCancellation ] CancellationToken cancellationToken = default )
@@ -153,9 +158,33 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
153158
154159 await foreach ( StreamingTextContent x in result . WithCancellation ( cancellationToken ) )
155160 {
156- if ( x . Text == null ) { continue ; }
157-
158- yield return x . Text ;
161+ TokenUsage ? tokenUsage = null ;
162+
163+ // The last message includes tokens usage metadata.
164+ // https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options
165+ if ( x . Metadata ? [ "Usage" ] is ChatTokenUsage usage )
166+ {
167+ this . _log . LogTrace ( "Usage report: input tokens: {InputTokenCount}, output tokens: {OutputTokenCount}, output reasoning tokens: {ReasoningTokenCount}" ,
168+ usage . InputTokenCount , usage . OutputTokenCount , usage . OutputTokenDetails ? . ReasoningTokenCount ?? 0 ) ;
169+
170+ tokenUsage = new TokenUsage
171+ {
172+ Timestamp = ( DateTimeOffset ? ) x . Metadata [ "CreatedAt" ] ?? DateTimeOffset . UtcNow ,
173+ ServiceType = "Azure OpenAI" ,
174+ ModelType = Constants . ModelType . TextGeneration ,
175+ ModelName = this . _deployment ,
176+ ServiceTokensIn = usage . InputTokenCount ,
177+ ServiceTokensOut = usage . OutputTokenCount ,
178+ ServiceReasoningTokens = usage . OutputTokenDetails ? . ReasoningTokenCount
179+ } ;
180+ }
181+
182+ // NOTE: as stated at https://platform.openai.com/docs/api-reference/chat/streaming#chat/streaming-choices,
183+ // the Choice can also be empty for the last chunk if we set stream_options: { "include_usage": true} to get token counts, so it is possible that
184+ // x.Text is null, but tokenUsage is not (token usage statistics for the entire request are included in the last chunk).
185+ if ( x . Text is null && tokenUsage is null ) { continue ; }
186+
187+ yield return new ( x . Text ?? string . Empty , tokenUsage ) ;
159188 }
160189 }
161190}
0 commit comments