microsoft
diff --git a/‎examples/001-dotnet-WebClient/Program.cs‎
Lines changed: 35 additions & 8 deletions b/‎examples/001-dotnet-WebClient/Program.cs‎
Lines changed: 35 additions & 8 deletions
diff --git a/‎examples/002-dotnet-Serverless/Program.cs‎
Lines changed: 34 additions & 8 deletions b/‎examples/002-dotnet-Serverless/Program.cs‎
Lines changed: 34 additions & 8 deletions
diff --git a/‎examples/104-dotnet-custom-LLM/Program.cs‎
Lines changed: 1 addition & 1 deletion b/‎examples/104-dotnet-custom-LLM/Program.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎extensions/Anthropic/AnthropicTextGeneration.cs‎
Lines changed: 1 addition & 1 deletion b/‎extensions/Anthropic/AnthropicTextGeneration.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextGenerator.cs‎
Lines changed: 33 additions & 4 deletions b/‎extensions/AzureOpenAI/AzureOpenAI/AzureOpenAITextGenerator.cs‎
Lines changed: 33 additions & 4 deletions
diff --git a/‎extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs‎
Lines changed: 2 additions & 3 deletions b/‎extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs‎
Lines changed: 3 additions & 3 deletions b/‎extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎extensions/ONNX/Onnx.FunctionalTests/OnnxTextGeneratorTest.cs‎
Lines changed: 1 addition & 1 deletion b/‎extensions/ONNX/Onnx.FunctionalTests/OnnxTextGeneratorTest.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎extensions/ONNX/Onnx/OnnxTextGenerator.cs‎
Lines changed: 1 addition & 1 deletion b/‎extensions/ONNX/Onnx/OnnxTextGenerator.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎extensions/Ollama/Ollama/OllamaTextGenerator.cs‎
Lines changed: 1 addition & 1 deletion b/‎extensions/Ollama/Ollama/OllamaTextGenerator.cs‎
Lines changed: 1 addition & 1 deletion
@@ -253,31 +253,58 @@ private static async Task AskSimpleQuestionStreamingTheAnswer()
     {
         var question = "What's E = m*c^2?";
         Console.WriteLine($"Question: {question}");
-        Console.WriteLine($"Expected result: formula explanation using the information loaded");
+        Console.WriteLine("Expected result: formula explanation using the information loaded");
 
         Console.Write("\nAnswer: ");
+        var tokenUsage = new List<TokenUsage>();
         var answerStream = s_memory.AskStreamingAsync(question, options: new SearchOptions { Stream = true });
 
         await foreach (var answer in answerStream)
         {
             // Print token received by LLM
             Console.Write(answer.Result);
+
+            // Collect token usage
+            if (answer.TokenUsage?.Count > 0)
+            {
+                tokenUsage = tokenUsage.Union(answer.TokenUsage).ToList();
+            }
+
             // Slow down the stream for demo purpose
             await Task.Delay(25);
         }
 
+        Console.WriteLine("\n\nToken usage report:");
+        foreach (var report in tokenUsage)
+        {
+            Console.WriteLine($"{report.ServiceType}: {report.ModelName} [{report.ModelType}]");
+            Console.WriteLine($"- Input : {report.TokenizerTokensIn} tokens (measured by KM tokenizer)");
+            Console.WriteLine($"- Input : {report.ServiceTokensIn} tokens (measured by remote service)");
+            Console.WriteLine($"- Output: {report.ServiceTokensOut} tokens (measured by remote service)");
+            Console.WriteLine($"- Output: {report.TokenizerTokensOut} tokens (measured by KM tokenizer)");
+            Console.WriteLine();
+        }
+
         Console.WriteLine("\n\n====================================\n");
 
         /* OUTPUT
 
         Question: What's E = m*c^2?
-
-        Answer: E = m*c^2 is the formula representing the principle of mass-energy equivalence, which was introduced by Albert Einstein. In this equation,
-        E stands for energy, m represents mass, and c is the speed of light in a vacuum, which is approximately 299,792,458 meters per second (m/s).
-        The equation states that the energy (E) of a system in its rest frame is equal to its mass (m) multiplied by the square of the speed of light (c^2).
-        This implies that mass and energy are interchangeable; a small amount of mass can be converted into a large amount of energy and vice versa,
-        due to the speed of light being a very large number when squared. This concept is a fundamental principle in physics and has important implications
-        in various fields, including nuclear physics and cosmology.
+        Expected result: formula explanation using the information loaded
+
+        Answer: E = m*c^2 is a formula derived by the physicist Albert Einstein, which describes the principle of
+        mass–energy equivalence. In this equation, E represents energy, m represents mass, and c represents the
+        speed of light in a vacuum (approximately 3 x 10^8 meters per second). The formula indicates that mass and
+        energy are interchangeable; they are different forms of the same thing and can be converted into each other.
+        This principle is fundamental in physics and has significant implications in various fields, including nuclear
+        physics and cosmology.
+
+        Token usage report:
+        Azure OpenAI: gpt-4o [TextGeneration]
+        - Input : 15657 tokens (measured by KM tokenizer)
+        - Input : 15664 tokens (measured by remote service)
+        - Output: 110 tokens (measured by remote service)
+        - Output: 110 tokens (measured by KM tokenizer)
 
         */
     }
 
@@ -311,31 +311,57 @@ private static async Task AskSimpleQuestionStreamingTheAnswer()
     {
         var question = "What's E = m*c^2?";
         Console.WriteLine($"Question: {question}");
-        Console.WriteLine($"Expected result: formula explanation using the information loaded");
+        Console.WriteLine("Expected result: formula explanation using the information loaded");
 
         Console.Write("\nAnswer: ");
+        var tokenUsage = new List<TokenUsage>();
         var answerStream = s_memory.AskStreamingAsync(question, options: new SearchOptions { Stream = true });
 
         await foreach (var answer in answerStream)
         {
             // Print token received by LLM
             Console.Write(answer.Result);
+
+            // Collect token usage
+            if (answer.TokenUsage?.Count > 0)
+            {
+                tokenUsage = tokenUsage.Union(answer.TokenUsage).ToList();
+            }
+
             // Slow down the stream for demo purpose
             await Task.Delay(25);
         }
 
+        Console.WriteLine("\n\nToken usage report:");
+        foreach (var report in tokenUsage)
+        {
+            Console.WriteLine($"{report.ServiceType}: {report.ModelName} [{report.ModelType}]");
+            Console.WriteLine($"- Input : {report.TokenizerTokensIn} tokens (measured by KM tokenizer)");
+            Console.WriteLine($"- Input : {report.ServiceTokensIn} tokens (measured by remote service)");
+            Console.WriteLine($"- Output: {report.ServiceTokensOut} tokens (measured by remote service)");
+            Console.WriteLine($"- Output: {report.TokenizerTokensOut} tokens (measured by KM tokenizer)");
+            Console.WriteLine();
+        }
+
         Console.WriteLine("\n\n====================================\n");
 
         /* OUTPUT
 
         Question: What's E = m*c^2?
-
-        Answer: E = m*c^2 is the formula representing the principle of mass-energy equivalence, which was introduced by Albert Einstein. In this equation,
-        E stands for energy, m represents mass, and c is the speed of light in a vacuum, which is approximately 299,792,458 meters per second (m/s).
-        The equation states that the energy (E) of a system in its rest frame is equal to its mass (m) multiplied by the square of the speed of light (c^2).
-        This implies that mass and energy are interchangeable; a small amount of mass can be converted into a large amount of energy and vice versa,
-        due to the speed of light being a very large number when squared. This concept is a fundamental principle in physics and has important implications
-        in various fields, including nuclear physics and cosmology.
+        Expected result: formula explanation using the information loaded
+
+        Answer: E = m*c^2 is a formula derived by physicist Albert Einstein, which expresses the principle of
+        mass–energy equivalence. In this equation, E represents energy, m represents mass, and c represents the
+        speed of light in a vacuum (approximately 3 x 10^8 meters per second). The formula indicates that mass and
+        energy are interchangeable; a small amount of mass can be converted into a large amount of energy, and vice
+        versa, differing only by a multiplicative constant (c^2).
+
+        Token usage report:
+        Azure OpenAI: gpt-4o [TextGeneration]
+        - Input : 24349 tokens (measured by KM tokenizer)
+        - Input : 24356 tokens (measured by remote service)
+        - Output: 103 tokens (measured by remote service)
+        - Output: 103 tokens (measured by KM tokenizer)
 
         */
     }
 
@@ -68,7 +68,7 @@ public IReadOnlyList<string> GetTokens(string text)
     }
 
     /// <inheritdoc />
-    public async IAsyncEnumerable<string> GenerateTextAsync(
+    public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
         string prompt,
         TextGenerationOptions options,
         [EnumeratorCancellation] CancellationToken cancellationToken = default)
 
@@ -97,7 +97,7 @@ public IReadOnlyList<string> GetTokens(string text)
     }
 
     /// <inheritdoc />
-    public async IAsyncEnumerable<string> GenerateTextAsync(
+    public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
         string prompt,
         TextGenerationOptions options,
         [EnumeratorCancellation] CancellationToken cancellationToken = default)
 
@@ -1,5 +1,6 @@
 // Copyright (c) Microsoft. All rights reserved.
 
+using System;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
 using System.Net.Http;
@@ -12,6 +13,7 @@
 using Microsoft.KernelMemory.Diagnostics;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
+using OpenAI.Chat;
 
 namespace Microsoft.KernelMemory.AI.AzureOpenAI;
 
@@ -28,6 +30,8 @@ public sealed class AzureOpenAITextGenerator : ITextGenerator
     private readonly ITextTokenizer _textTokenizer;
     private readonly ILogger<AzureOpenAITextGenerator> _log;
 
+    private readonly string _deployment;
+
     /// <inheritdoc/>
     public int MaxTokenTotal { get; }
 
@@ -87,6 +91,7 @@ public AzureOpenAITextGenerator(
     {
         this._client = skClient;
         this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<AzureOpenAITextGenerator>();
+        this._deployment = config.Deployment;
         this.MaxTokenTotal = config.MaxTokenTotal;
 
         textTokenizer ??= TokenizerFactory.GetTokenizerForEncoding(config.Tokenizer);
@@ -114,7 +119,7 @@ public IReadOnlyList<string> GetTokens(string text)
     }
 
     /// <inheritdoc/>
-    public async IAsyncEnumerable<string> GenerateTextAsync(
+    public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
         string prompt,
         TextGenerationOptions options,
         [EnumeratorCancellation] CancellationToken cancellationToken = default)
@@ -153,9 +158,33 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
 
         await foreach (StreamingTextContent x in result.WithCancellation(cancellationToken))
         {
-            if (x.Text == null) { continue; }
-
-            yield return x.Text;
+            TokenUsage? tokenUsage = null;
+
+            // The last message includes tokens usage metadata.
+            // https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options
+            if (x.Metadata?["Usage"] is ChatTokenUsage usage)
+            {
+                this._log.LogTrace("Usage report: input tokens: {InputTokenCount}, output tokens: {OutputTokenCount}, output reasoning tokens: {ReasoningTokenCount}",
+                    usage.InputTokenCount, usage.OutputTokenCount, usage.OutputTokenDetails?.ReasoningTokenCount ?? 0);
+
+                tokenUsage = new TokenUsage
+                {
+                    Timestamp = (DateTimeOffset?)x.Metadata["CreatedAt"] ?? DateTimeOffset.UtcNow,
+                    ServiceType = "Azure OpenAI",
+                    ModelType = Constants.ModelType.TextGeneration,
+                    ModelName = this._deployment,
+                    ServiceTokensIn = usage.InputTokenCount,
+                    ServiceTokensOut = usage.OutputTokenCount,
+                    ServiceReasoningTokens = usage.OutputTokenDetails?.ReasoningTokenCount
+                };
+            }
+
+            // NOTE: as stated at https://platform.openai.com/docs/api-reference/chat/streaming#chat/streaming-choices,
+            // the Choice can also be empty for the last chunk if we set stream_options: { "include_usage": true} to get token counts, so it is possible that
+            // x.Text is null, but tokenUsage is not (token usage statistics for the entire request are included in the last chunk).
+            if (x.Text is null && tokenUsage is null) { continue; }
+
+            yield return new(x.Text ?? string.Empty, tokenUsage);
         }
     }
 }
@@ -40,7 +40,7 @@ public void ItCountsTokens()
 
         // Assert
         Console.WriteLine("Phi3 token count: " + tokenCount);
-        Console.WriteLine("GPT4 token count: " + (new CL100KTokenizer()).CountTokens(text));
+        Console.WriteLine("GPT4 token count: " + new CL100KTokenizer().CountTokens(text));
         Console.WriteLine($"Time: {this._timer.ElapsedMilliseconds / 1000} secs");
 
         // Expected result with Phi-3-mini-4k-instruct-q4.gguf, without BoS (https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf)
@@ -90,9 +90,8 @@ public async Task ItGeneratesText()
         this._timer.Restart();
         var tokens = this._target.GenerateTextAsync(prompt, options);
         var result = new StringBuilder();
-        await foreach (string token in tokens)
+        await foreach (var token in tokens)
         {
-            // Console.WriteLine(token);
             result.Append(token);
         }
 
 
@@ -74,7 +74,7 @@ public IReadOnlyList<string> GetTokens(string text)
     }
 
     /// <inheritdoc/>
-    public IAsyncEnumerable<string> GenerateTextAsync(
+    public IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
         string prompt,
         TextGenerationOptions options,
         CancellationToken cancellationToken = default)
@@ -85,7 +85,7 @@ public IAsyncEnumerable<string> GenerateTextAsync(
             ? options.TokenSelectionBiases.ToDictionary(pair => (LLamaToken)pair.Key, pair => pair.Value)
             : [];
 
-        var samplingPipeline = new DefaultSamplingPipeline()
+        var samplingPipeline = new DefaultSamplingPipeline
         {
             Temperature = (float)options.Temperature,
             TopP = (float)options.NucleusSampling,
@@ -103,7 +103,7 @@ public IAsyncEnumerable<string> GenerateTextAsync(
         };
 
         this._log.LogTrace("Generating text, temperature {0}, max tokens {1}", samplingPipeline.Temperature, settings.MaxTokens);
-        return executor.InferAsync(prompt, settings, cancellationToken);
+        return executor.InferAsync(prompt, settings, cancellationToken).Select(x => new GeneratedTextContent(x));
     }
 
     /// <inheritdoc/>
 
@@ -45,7 +45,7 @@ public async Task ItGeneratesText()
         this._timer.Restart();
         var tokens = this._target.GenerateTextAsync(prompt, options);
         var result = new StringBuilder();
-        await foreach (string token in tokens)
+        await foreach (var token in tokens)
         {
             result.Append(token);
         }
 
@@ -85,7 +85,7 @@ public OnnxTextGenerator(
     }
 
     /// <inheritdoc/>
-    public async IAsyncEnumerable<string> GenerateTextAsync(
+    public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
         string prompt,
         TextGenerationOptions? options = null,
         [EnumeratorCancellation] CancellationToken cancellationToken = default)
 
@@ -91,7 +91,7 @@ public IReadOnlyList<string> GetTokens(string text)
         return this._textTokenizer.GetTokens(text);
     }
 
-    public async IAsyncEnumerable<string> GenerateTextAsync(
+    public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
         string prompt,
         TextGenerationOptions options,
         [EnumeratorCancellation] CancellationToken cancellationToken = default)
Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,7 @@ public IReadOnlyList<string> GetTokens(string text)`
`68`	`68`	`}`
`69`	`69`
`70`	`70`	`/// <inheritdoc />`
`71`		`- public async IAsyncEnumerable<string> GenerateTextAsync(`
	`71`	`+ public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(`
`72`	`72`	`string prompt,`
`73`	`73`	`TextGenerationOptions options,`
`74`	`74`	`[EnumeratorCancellation] CancellationToken cancellationToken = default)`
Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@ public IReadOnlyList<string> GetTokens(string text)`
`97`	`97`	`}`
`98`	`98`
`99`	`99`	`/// <inheritdoc />`
`100`		`- public async IAsyncEnumerable<string> GenerateTextAsync(`
	`100`	`+ public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(`
`101`	`101`	`string prompt,`
`102`	`102`	`TextGenerationOptions options,`
`103`	`103`	`[EnumeratorCancellation] CancellationToken cancellationToken = default)`
Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ public async Task ItGeneratesText()`
`45`	`45`	`this._timer.Restart();`
`46`	`46`	`var tokens = this._target.GenerateTextAsync(prompt, options);`
`47`	`47`	`var result = new StringBuilder();`
`48`		`- await foreach (string token in tokens)`
	`48`	`+ await foreach (var token in tokens)`
`49`	`49`	`{`
`50`	`50`	`result.Append(token);`
`51`	`51`	`}`
Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,7 @@ public OnnxTextGenerator(`
`85`	`85`	`}`
`86`	`86`
`87`	`87`	`/// <inheritdoc/>`
`88`		`- public async IAsyncEnumerable<string> GenerateTextAsync(`
	`88`	`+ public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(`
`89`	`89`	`string prompt,`
`90`	`90`	`TextGenerationOptions? options = null,`
`91`	`91`	`[EnumeratorCancellation] CancellationToken cancellationToken = default)`
Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ public IReadOnlyList<string> GetTokens(string text)`
`91`	`91`	`return this._textTokenizer.GetTokens(text);`
`92`	`92`	`}`
`93`	`93`
`94`		`- public async IAsyncEnumerable<string> GenerateTextAsync(`
	`94`	`+ public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(`
`95`	`95`	`string prompt,`
`96`	`96`	`TextGenerationOptions options,`
`97`	`97`	`[EnumeratorCancellation] CancellationToken cancellationToken = default)`