Created a guided sampling demo using the batched executor

martindevans · martindevans · commit 80d10806fd8d · 2024-02-25T17:44:41.000Z
diff --git a/LLama.Examples/ExampleRunner.cs b/LLama.Examples/ExampleRunner.cs
@@ -26,6 +26,7 @@ public class ExampleRunner
         { "Semantic Kernel: Store", SemanticKernelMemory.Run },
         { "Batched Executor: Fork", BatchedExecutorFork.Run },
         { "Batched Executor: Rewind", BatchedExecutorRewind.Run },
+        { "Batched Executor: Guidance", BatchedExecutorGuidance.Run },
         { "Exit", () => { Environment.Exit(0); return Task.CompletedTask; } }
     };
 
diff --git a/LLama.Examples/Examples/BatchedExecutorGuidance.cs b/LLama.Examples/Examples/BatchedExecutorGuidance.cs
@@ -0,0 +1,125 @@
+﻿using LLama.Batched;
+using LLama.Common;
+using LLama.Native;
+using LLama.Sampling;
+using Spectre.Console;
+
+namespace LLama.Examples.Examples;
+
+/// <summary>
+/// This demonstrates using a batch to generate two sequences and then using one
+/// sequence as the negative guidance ("context free guidance") for the other.
+/// </summary>
+public class BatchedExecutorGuidance
+{
+    private const int n_len = 32;
+
+    public static async Task Run()
+    {
+        string modelPath = UserSettings.GetModelPath();
+
+        var parameters = new ModelParams(modelPath);
+        using var model = LLamaWeights.LoadFromFile(parameters);
+
+        var positivePrompt = AnsiConsole.Ask("Positive Prompt (or ENTER for default):", "My favourite colour is").Trim();
+        var negativePrompt = AnsiConsole.Ask("Negative Prompt (or ENTER for default):", "I hate the colour red. My favourite colour is").Trim();
+
+        // Create an executor that can evaluate a batch of conversations together
+        var executor = new BatchedExecutor(model, parameters);
+
+        // Print some info
+        var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name");
+        Console.WriteLine($"Created executor with model: {name}");
+
+        // Load the two prompts into two conversations
+        var guided = executor.Prompt(positivePrompt);
+        var guidance = executor.Prompt(negativePrompt);
+
+        // Run inference to evaluate prompts
+        await AnsiConsole
+             .Status()
+             .Spinner(Spinner.Known.Line)
+             .StartAsync("Evaluating Prompts...", _ => executor.Infer());
+
+        // Fork the "guided" conversation. We'll run this one without guidance for comparison
+        var unguided = guided.Fork();
+
+        // Run inference loop
+        var unguidedSampler = new GuidedSampler(null);
+        var unguidedDecoder = new StreamingTokenDecoder(executor.Context);
+        var guidedSampler = new GuidedSampler(guidance);
+        var guidedDecoder = new StreamingTokenDecoder(executor.Context);
+        await AnsiConsole
+           .Progress()
+           .StartAsync(async progress =>
+            {
+                var reporter = progress.AddTask("Running Inference", maxValue: n_len);
+
+                for (var i = 0; i < n_len; i++)
+                {
+                    if (i != 0)
+                        await executor.Infer();
+
+                    // Sample from the "unguided" conversation
+                    var u = unguidedSampler.Sample(executor.Context.NativeHandle, unguided.Sample().ToArray(), Array.Empty<LLamaToken>());
+                    unguidedDecoder.Add(u);
+                    unguided.Prompt(u);
+
+                    // Sample form the "guided" conversation
+                    var g = guidedSampler.Sample(executor.Context.NativeHandle, guided.Sample().ToArray(), Array.Empty<LLamaToken>());
+                    guidedDecoder.Add(g);
+
+                    // Use this token to advance both guided _and_ guidance. Keeping them in sync (except for the initial prompt).
+                    guided.Prompt(g);
+                    guidance.Prompt(g);
+
+                    // Early exit if we reach the natural end of the guided sentence
+                    if (g == model.EndOfSentenceToken)
+                        break;
+
+                    reporter.Increment(1);
+                }
+            });
+
+        AnsiConsole.MarkupLine($"[green]Unguided:[/][white]{unguidedDecoder.Read()}[/]");
+        AnsiConsole.MarkupLine($"[green]Guided:[/][white]{guidedDecoder.Read()}[/]");
+    }
+
+    private class GuidedSampler(Conversation? guidance)
+        : BaseSamplingPipeline
+    {
+        public override void Accept(SafeLLamaContextHandle ctx, LLamaToken token)
+        {
+        }
+
+        public override ISamplingPipeline Clone()
+        {
+            throw new NotSupportedException();
+        }
+
+        protected override IReadOnlyList<LLamaToken> GetProtectedTokens(SafeLLamaContextHandle ctx)
+        {
+            return Array.Empty<LLamaToken>();
+        }
+
+        protected override void ProcessLogits(SafeLLamaContextHandle ctx, Span<float> logits, ReadOnlySpan<LLamaToken> lastTokens)
+        {
+            if (guidance != null)
+            {
+                // Get the logits generated by the guidance sequences
+                var guidanceLogits = guidance.Sample();
+
+                // Use those logits to guide this sequence
+                NativeApi.llama_sample_apply_guidance(ctx, logits, guidanceLogits, 2);
+            }
+        }
+
+        protected override LLamaToken ProcessTokenDataArray(SafeLLamaContextHandle ctx, LLamaTokenDataArray candidates, ReadOnlySpan<LLamaToken> lastTokens)
+        {
+            candidates.Temperature(ctx, 0.8f);
+            candidates.TopK(ctx, 25);
+
+            return candidates.SampleToken(ctx);
+        }
+    }
+}