SciSharp
diff --git a/‎LLama.Examples/Examples/BatchedExecutorBoolQ.cs‎
Lines changed: 2 additions & 2 deletions b/‎LLama.Examples/Examples/BatchedExecutorBoolQ.cs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎LLama.Examples/Examples/BatchedExecutorLLava.cs‎
Lines changed: 1 addition & 1 deletion b/‎LLama.Examples/Examples/BatchedExecutorLLava.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎LLama.Examples/Examples/BatchedExecutorSimple.cs‎
Lines changed: 2 additions & 2 deletions b/‎LLama.Examples/Examples/BatchedExecutorSimple.cs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎LLama.Examples/Examples/LLama2ChatSession.cs‎
Lines changed: 2 additions & 2 deletions b/‎LLama.Examples/Examples/LLama2ChatSession.cs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎LLama.Examples/Examples/LLama3ChatSession.cs‎
Lines changed: 2 additions & 2 deletions b/‎LLama.Examples/Examples/LLama3ChatSession.cs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎LLama.Unittest/BasicTest.cs‎
Lines changed: 1 addition & 1 deletion b/‎LLama.Unittest/BasicTest.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎LLama.Unittest/LLamaContextTests.cs‎
Lines changed: 1 addition & 1 deletion b/‎LLama.Unittest/LLamaContextTests.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎LLama.Unittest/LLamaContextWithCustomLoggerTests.cs‎
Lines changed: 1 addition & 1 deletion b/‎LLama.Unittest/LLamaContextWithCustomLoggerTests.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎LLama.Unittest/SamplingTests.cs‎
Lines changed: 6 additions & 7 deletions b/‎LLama.Unittest/SamplingTests.cs‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎LLama.Unittest/TemplateTests.cs‎
Lines changed: 1 addition & 19 deletions b/‎LLama.Unittest/TemplateTests.cs‎
Lines changed: 1 addition & 19 deletions
@@ -193,8 +193,8 @@ public void Sample()
 
             var token = _sampler.Sample(_executor.Context, _conversation.GetSampleIndex());
 
-            var tokens = _executor.Context.NativeHandle.ModelHandle.Tokens;
-            if (tokens.IsEndOfGeneration(token) || tokens.Newline == token)
+            var vocab = _executor.Context.Vocab;
+            if (token.IsEndOfGeneration(vocab) || vocab.Newline == token)
             {
                 _sampledToken = default;
                 _finished = true;
 
@@ -76,7 +76,7 @@ await AnsiConsole
                       await executor.Infer();
 
                       var token = sampler.Sample(executor.Context.NativeHandle, conversation.GetSampleIndex());
-                      if (executor.Context.NativeHandle.ModelHandle.Tokens.IsEndOfGeneration(token))
+                      if (token.IsEndOfGeneration(executor.Context.Vocab))
                           break;
 
                       decoder.Add(token);
 
@@ -27,7 +27,7 @@ public static async Task Run()
         using var executor = new BatchedExecutor(model, parameters);
 
         // we'll need this for evaluating if we are at the end of generation
-        var modelTokens = executor.Context.NativeHandle.ModelHandle.Tokens;
+        var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;
 
         // Print some info
         var name = model.Metadata.GetValueOrDefault("general.name", "unknown model name");
@@ -115,7 +115,7 @@ await AnsiConsole.Live(table).StartAsync(async ctx =>
                     var token = conversationData.Conversation.Sample(conversationData.Sampler);
 
                     // Some special tokens indicate that this sequence has ended. Check if that's what has been chosen by the sampling pipeline.
-                    if (modelTokens.IsEndOfGeneration(token))
+                    if (token.IsEndOfGeneration(vocab))
                     {
                         conversationData.MarkComplete();
                     }
 
@@ -33,7 +33,7 @@ public static async Task Run()
         session.WithHistoryTransform(new Llama2HistoryTransformer());
 
         session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
-            [model.Tokens.EndOfTurnToken ?? "User:", "�"],
+            ["User:", "�"],
             redundancyLength: 5));
 
         var inferenceParams = new InferenceParams
@@ -44,7 +44,7 @@ public static async Task Run()
             },
 
             MaxTokens = -1, // keep generating tokens until the anti prompt is encountered
-            AntiPrompts = [model.Tokens.EndOfTurnToken ?? "User:"] // model specific end of turn string (or default)
+            AntiPrompts = ["User:"] // model specific end of turn string (or default)
         };
 
         Console.ForegroundColor = ConsoleColor.Yellow;
 
@@ -34,7 +34,7 @@ public static async Task Run()
 
         // Add a transformer to eliminate printing the end of turn tokens, llama 3 specifically has an odd LF that gets printed sometimes
         session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
-            [model.Tokens.EndOfTurnToken ?? "User:", "�"],
+            ["User:", "�"],
             redundancyLength: 5));
 
         var inferenceParams = new InferenceParams
@@ -45,7 +45,7 @@ public static async Task Run()
             },
 
             MaxTokens = -1, // keep generating tokens until the anti prompt is encountered
-            AntiPrompts = [model.Tokens.EndOfTurnToken ?? "User:"] // model specific end of turn string (or default)
+            AntiPrompts = ["User:"] // model specific end of turn string (or default)
         };
 
         Console.ForegroundColor = ConsoleColor.Yellow;
 
@@ -29,7 +29,7 @@ public void Dispose()
         [Fact]
         public void BasicModelProperties()
         {
-            Assert.Equal(128256, _model.VocabCount);
+            Assert.Equal(128256, _model.Vocab.Count);
             Assert.Equal(131072, _model.ContextSize);
             Assert.Equal(2048, _model.EmbeddingSize);
         }
 
@@ -31,7 +31,7 @@ public void CheckProperties()
         {
             Assert.Equal(128u, _context.ContextSize);
             Assert.Equal(2048, _context.EmbeddingSize);
-            Assert.Equal(128256, _context.VocabCount);
+            Assert.Equal(128256, _context.Vocab.Count);
         }
 
         [Fact]
 
@@ -57,7 +57,7 @@ public void CheckProperties()
         {
             Assert.Equal(128u, _context.ContextSize);
             Assert.Equal(2048, _context.EmbeddingSize);
-            Assert.Equal(128256, _context.VocabCount);
+            Assert.Equal(128256, _context.Vocab.Count);
         }
     }
 }
@@ -1,15 +1,14 @@
 using LLama.Common;
 using LLama.Native;
-
 using System.Numerics.Tensors;
-using System.Runtime.InteropServices;
 using System.Text;
 
 using Xunit.Abstractions;
 
 namespace LLama.Unittest
 {
-    public class SamplingTests : IDisposable
+    public class SamplingTests
+        : IDisposable
     {
         private readonly ITestOutputHelper _testOutputHelper;
         private readonly LLamaWeights _model;
@@ -61,7 +60,7 @@ public void Sampling()
                 var array = LLamaTokenDataArray.Create(logits);
                 {
                     using var _ = LLamaTokenDataArrayNative.Create(array, out var cur_p);
-                    var rawLogits = new float[_model.VocabCount];
+                    var rawLogits = new float[_model.Vocab.Count];
                     for (int j = 0; j < cur_p.Data.Length; j++)
                     {
                         rawLogits[(int) cur_p.Data[j].ID] = cur_p.Data[j].Logit;
@@ -119,7 +118,7 @@ public void BatchedSampling()
 
                 for (int b = 0; b < batch_count; b++)
                 {
-                    var logits = all_logits.Slice(b * _model.VocabCount, _model.VocabCount);
+                    var logits = all_logits.Slice(b * _model.Vocab.Count, _model.Vocab.Count);
 
                     // Test raw sampling
                     Assert.Equal(expected, TensorPrimitives.IndexOfMax(logits));
@@ -128,7 +127,7 @@ public void BatchedSampling()
                     var array = LLamaTokenDataArray.Create(logits);
                     {
                         using var _ = LLamaTokenDataArrayNative.Create(array, out var cur_p);
-                        var rawLogits = new float[_model.VocabCount];
+                        var rawLogits = new float[_model.Vocab.Count];
                         for (int j = 0; j < cur_p.Data.Length; j++)
                         {
                             rawLogits[(int) cur_p.Data[j].ID] = cur_p.Data[j].Logit;
@@ -170,7 +169,7 @@ private static SafeLLamaSamplerChainHandle CreateChain(SafeLLamaContextHandle co
                 penaltyCount: 60, repeat: 1, freq: 0, presence: 0
             );
 
-            if (logit_bias != null) { chain.AddLogitBias(context.VocabCount, logit_bias); }
+            if (logit_bias != null) { chain.AddLogitBias(context.Vocab.Count, logit_bias); }
 
             chain.AddTopK(10);
             chain.AddTemperature(0.1f);
 
@@ -64,6 +64,7 @@ public void BasicTemplate()
                                     + "<|start_header_id|>assistant<|end_header_id|>\n\n333<|eot_id|>"
                                     + "<|start_header_id|>user<|end_header_id|>\n\nccc<|eot_id|>";
 
+        var eq = expected == templateResult;
         Assert.Equal(expected, templateResult);
     }
 
@@ -244,25 +245,6 @@ public void Clear_ResetsTemplateState()
         Assert.Equal(expectedTemplate, templateResult);
     }
 
-    [Fact]
-    public void EndOTurnToken_ReturnsExpected()
-    {
-        Assert.Equal("<|eot_id|>", _model.Tokens.EndOfTurnToken);
-    }
-
-    [Fact]
-    public void EndOSpeechToken_ReturnsExpected()
-    {
-        _output.WriteLine($"EOS: {_model.Tokens.EOS}");
-        _output.WriteLine($"EOT: {_model.Tokens.EOT}");
-        _output.WriteLine($"BOS: {_model.Tokens.BOS}");
-
-        var eosStr = ConvertTokenToString(_model.Tokens.EOS!.Value);
-        _output.WriteLine(eosStr ?? "null");
-
-        Assert.Equal("<|eot_id|>", _model.Tokens.EndOfSpeechToken);
-    }
-
     private string? ConvertTokenToString(LLamaToken token)
     {
         _output.WriteLine($"ConvertTokenToString: {token}");
Original file line number	Diff line number	Diff line change
`@@ -193,8 +193,8 @@ public void Sample()`
`193`	`193`
`194`	`194`	`var token = _sampler.Sample(_executor.Context, _conversation.GetSampleIndex());`
`195`	`195`
`196`		`- var tokens = _executor.Context.NativeHandle.ModelHandle.Tokens;`
`197`		`- if (tokens.IsEndOfGeneration(token) \|\| tokens.Newline == token)`
	`196`	`+ var vocab = _executor.Context.Vocab;`
	`197`	`+ if (token.IsEndOfGeneration(vocab) \|\| vocab.Newline == token)`
`198`	`198`	`{`
`199`	`199`	`_sampledToken = default;`
`200`	`200`	`_finished = true;`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ public static async Task Run()`
`27`	`27`	`using var executor = new BatchedExecutor(model, parameters);`
`28`	`28`
`29`	`29`	`// we'll need this for evaluating if we are at the end of generation`
`30`		`- var modelTokens = executor.Context.NativeHandle.ModelHandle.Tokens;`
	`30`	`+ var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;`
`31`	`31`
`32`	`32`	`// Print some info`
`33`	`33`	`var name = model.Metadata.GetValueOrDefault("general.name", "unknown model name");`
`@@ -115,7 +115,7 @@ await AnsiConsole.Live(table).StartAsync(async ctx =>`
`115`	`115`	`var token = conversationData.Conversation.Sample(conversationData.Sampler);`
`116`	`116`
`117`	`117`	`// Some special tokens indicate that this sequence has ended. Check if that's what has been chosen by the sampling pipeline.`
`118`		`- if (modelTokens.IsEndOfGeneration(token))`
	`118`	`+ if (token.IsEndOfGeneration(vocab))`
`119`	`119`	`{`
`120`	`120`	`conversationData.MarkComplete();`
`121`	`121`	`}`
Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@ public void Dispose()`
`29`	`29`	`[Fact]`
`30`	`30`	`public void BasicModelProperties()`
`31`	`31`	`{`
`32`		`- Assert.Equal(128256, _model.VocabCount);`
	`32`	`+ Assert.Equal(128256, _model.Vocab.Count);`
`33`	`33`	`Assert.Equal(131072, _model.ContextSize);`
`34`	`34`	`Assert.Equal(2048, _model.EmbeddingSize);`
`35`	`35`	`}`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ public void CheckProperties()`
`31`	`31`	`{`
`32`	`32`	`Assert.Equal(128u, _context.ContextSize);`
`33`	`33`	`Assert.Equal(2048, _context.EmbeddingSize);`
`34`		`- Assert.Equal(128256, _context.VocabCount);`
	`34`	`+ Assert.Equal(128256, _context.Vocab.Count);`
`35`	`35`	`}`
`36`	`36`
`37`	`37`	`[Fact]`
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ public void CheckProperties()`
`57`	`57`	`{`
`58`	`58`	`Assert.Equal(128u, _context.ContextSize);`
`59`	`59`	`Assert.Equal(2048, _context.EmbeddingSize);`
`60`		`- Assert.Equal(128256, _context.VocabCount);`
	`60`	`+ Assert.Equal(128256, _context.Vocab.Count);`
`61`	`61`	`}`
`62`	`62`	`}`
`63`	`63`	`}`