Skip to content

Commit e9c38cf

Browse files
authored
Merge pull request #1068 from martindevans/jan_2025_binary_update
Jan 2025 llama.cpp Update
2 parents 52f744d + 9a32339 commit e9c38cf

30 files changed

+605
-373
lines changed

LLama.Examples/Examples/BatchedExecutorBoolQ.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,8 @@ public void Sample()
193193

194194
var token = _sampler.Sample(_executor.Context, _conversation.GetSampleIndex());
195195

196-
var tokens = _executor.Context.NativeHandle.ModelHandle.Tokens;
197-
if (tokens.IsEndOfGeneration(token) || tokens.Newline == token)
196+
var vocab = _executor.Context.Vocab;
197+
if (token.IsEndOfGeneration(vocab) || vocab.Newline == token)
198198
{
199199
_sampledToken = default;
200200
_finished = true;

LLama.Examples/Examples/BatchedExecutorLLava.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ await AnsiConsole
7676
await executor.Infer();
7777

7878
var token = sampler.Sample(executor.Context.NativeHandle, conversation.GetSampleIndex());
79-
if (executor.Context.NativeHandle.ModelHandle.Tokens.IsEndOfGeneration(token))
79+
if (token.IsEndOfGeneration(executor.Context.Vocab))
8080
break;
8181

8282
decoder.Add(token);

LLama.Examples/Examples/BatchedExecutorSimple.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ public static async Task Run()
2727
using var executor = new BatchedExecutor(model, parameters);
2828

2929
// we'll need this for evaluating if we are at the end of generation
30-
var modelTokens = executor.Context.NativeHandle.ModelHandle.Tokens;
30+
var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;
3131

3232
// Print some info
3333
var name = model.Metadata.GetValueOrDefault("general.name", "unknown model name");
@@ -115,7 +115,7 @@ await AnsiConsole.Live(table).StartAsync(async ctx =>
115115
var token = conversationData.Conversation.Sample(conversationData.Sampler);
116116

117117
// Some special tokens indicate that this sequence has ended. Check if that's what has been chosen by the sampling pipeline.
118-
if (modelTokens.IsEndOfGeneration(token))
118+
if (token.IsEndOfGeneration(vocab))
119119
{
120120
conversationData.MarkComplete();
121121
}

LLama.Examples/Examples/LLama2ChatSession.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public static async Task Run()
3333
session.WithHistoryTransform(new Llama2HistoryTransformer());
3434

3535
session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
36-
[model.Tokens.EndOfTurnToken ?? "User:", "�"],
36+
["User:", "�"],
3737
redundancyLength: 5));
3838

3939
var inferenceParams = new InferenceParams
@@ -44,7 +44,7 @@ public static async Task Run()
4444
},
4545

4646
MaxTokens = -1, // keep generating tokens until the anti prompt is encountered
47-
AntiPrompts = [model.Tokens.EndOfTurnToken ?? "User:"] // model specific end of turn string (or default)
47+
AntiPrompts = ["User:"] // model specific end of turn string (or default)
4848
};
4949

5050
Console.ForegroundColor = ConsoleColor.Yellow;

LLama.Examples/Examples/LLama3ChatSession.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public static async Task Run()
3434

3535
// Add a transformer to eliminate printing the end of turn tokens, llama 3 specifically has an odd LF that gets printed sometimes
3636
session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
37-
[model.Tokens.EndOfTurnToken ?? "User:", "�"],
37+
["User:", "�"],
3838
redundancyLength: 5));
3939

4040
var inferenceParams = new InferenceParams
@@ -45,7 +45,7 @@ public static async Task Run()
4545
},
4646

4747
MaxTokens = -1, // keep generating tokens until the anti prompt is encountered
48-
AntiPrompts = [model.Tokens.EndOfTurnToken ?? "User:"] // model specific end of turn string (or default)
48+
AntiPrompts = ["User:"] // model specific end of turn string (or default)
4949
};
5050

5151
Console.ForegroundColor = ConsoleColor.Yellow;

LLama.Unittest/BasicTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public void Dispose()
2929
[Fact]
3030
public void BasicModelProperties()
3131
{
32-
Assert.Equal(128256, _model.VocabCount);
32+
Assert.Equal(128256, _model.Vocab.Count);
3333
Assert.Equal(131072, _model.ContextSize);
3434
Assert.Equal(2048, _model.EmbeddingSize);
3535
}

LLama.Unittest/LLamaContextTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public void CheckProperties()
3131
{
3232
Assert.Equal(128u, _context.ContextSize);
3333
Assert.Equal(2048, _context.EmbeddingSize);
34-
Assert.Equal(128256, _context.VocabCount);
34+
Assert.Equal(128256, _context.Vocab.Count);
3535
}
3636

3737
[Fact]

LLama.Unittest/LLamaContextWithCustomLoggerTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public void CheckProperties()
5757
{
5858
Assert.Equal(128u, _context.ContextSize);
5959
Assert.Equal(2048, _context.EmbeddingSize);
60-
Assert.Equal(128256, _context.VocabCount);
60+
Assert.Equal(128256, _context.Vocab.Count);
6161
}
6262
}
6363
}

LLama.Unittest/SamplingTests.cs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
using LLama.Common;
22
using LLama.Native;
3-
43
using System.Numerics.Tensors;
5-
using System.Runtime.InteropServices;
64
using System.Text;
75

86
using Xunit.Abstractions;
97

108
namespace LLama.Unittest
119
{
12-
public class SamplingTests : IDisposable
10+
public class SamplingTests
11+
: IDisposable
1312
{
1413
private readonly ITestOutputHelper _testOutputHelper;
1514
private readonly LLamaWeights _model;
@@ -61,7 +60,7 @@ public void Sampling()
6160
var array = LLamaTokenDataArray.Create(logits);
6261
{
6362
using var _ = LLamaTokenDataArrayNative.Create(array, out var cur_p);
64-
var rawLogits = new float[_model.VocabCount];
63+
var rawLogits = new float[_model.Vocab.Count];
6564
for (int j = 0; j < cur_p.Data.Length; j++)
6665
{
6766
rawLogits[(int) cur_p.Data[j].ID] = cur_p.Data[j].Logit;
@@ -119,7 +118,7 @@ public void BatchedSampling()
119118

120119
for (int b = 0; b < batch_count; b++)
121120
{
122-
var logits = all_logits.Slice(b * _model.VocabCount, _model.VocabCount);
121+
var logits = all_logits.Slice(b * _model.Vocab.Count, _model.Vocab.Count);
123122

124123
// Test raw sampling
125124
Assert.Equal(expected, TensorPrimitives.IndexOfMax(logits));
@@ -128,7 +127,7 @@ public void BatchedSampling()
128127
var array = LLamaTokenDataArray.Create(logits);
129128
{
130129
using var _ = LLamaTokenDataArrayNative.Create(array, out var cur_p);
131-
var rawLogits = new float[_model.VocabCount];
130+
var rawLogits = new float[_model.Vocab.Count];
132131
for (int j = 0; j < cur_p.Data.Length; j++)
133132
{
134133
rawLogits[(int) cur_p.Data[j].ID] = cur_p.Data[j].Logit;
@@ -170,7 +169,7 @@ private static SafeLLamaSamplerChainHandle CreateChain(SafeLLamaContextHandle co
170169
penaltyCount: 60, repeat: 1, freq: 0, presence: 0
171170
);
172171

173-
if (logit_bias != null) { chain.AddLogitBias(context.VocabCount, logit_bias); }
172+
if (logit_bias != null) { chain.AddLogitBias(context.Vocab.Count, logit_bias); }
174173

175174
chain.AddTopK(10);
176175
chain.AddTemperature(0.1f);

LLama.Unittest/TemplateTests.cs

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ public void BasicTemplate()
6464
+ "<|start_header_id|>assistant<|end_header_id|>\n\n333<|eot_id|>"
6565
+ "<|start_header_id|>user<|end_header_id|>\n\nccc<|eot_id|>";
6666

67+
var eq = expected == templateResult;
6768
Assert.Equal(expected, templateResult);
6869
}
6970

@@ -244,25 +245,6 @@ public void Clear_ResetsTemplateState()
244245
Assert.Equal(expectedTemplate, templateResult);
245246
}
246247

247-
[Fact]
248-
public void EndOTurnToken_ReturnsExpected()
249-
{
250-
Assert.Equal("<|eot_id|>", _model.Tokens.EndOfTurnToken);
251-
}
252-
253-
[Fact]
254-
public void EndOSpeechToken_ReturnsExpected()
255-
{
256-
_output.WriteLine($"EOS: {_model.Tokens.EOS}");
257-
_output.WriteLine($"EOT: {_model.Tokens.EOT}");
258-
_output.WriteLine($"BOS: {_model.Tokens.BOS}");
259-
260-
var eosStr = ConvertTokenToString(_model.Tokens.EOS!.Value);
261-
_output.WriteLine(eosStr ?? "null");
262-
263-
Assert.Equal("<|eot_id|>", _model.Tokens.EndOfSpeechToken);
264-
}
265-
266248
private string? ConvertTokenToString(LLamaToken token)
267249
{
268250
_output.WriteLine($"ConvertTokenToString: {token}");

0 commit comments

Comments
 (0)