15
15
16
16
namespace LLama
17
17
{
18
- using llama_token = Int32 ;
19
-
20
18
/// <summary>
21
19
/// A llama_context, which holds all the context required to interact with a model
22
20
/// </summary>
@@ -93,7 +91,7 @@ public void SetSeed(uint seed)
93
91
/// <param name="addBos">Whether to add a bos to the text.</param>
94
92
/// <param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
95
93
/// <returns></returns>
96
- public llama_token [ ] Tokenize ( string text , bool addBos = true , bool special = false )
94
+ public LLamaToken [ ] Tokenize ( string text , bool addBos = true , bool special = false )
97
95
{
98
96
return NativeHandle . Tokenize ( text , addBos , special , Encoding ) ;
99
97
}
@@ -104,7 +102,7 @@ public llama_token[] Tokenize(string text, bool addBos = true, bool special = fa
104
102
/// <param name="tokens"></param>
105
103
/// <returns></returns>
106
104
[ Obsolete ( "Use a `StreamingTokenDecoder` instead" ) ]
107
- public string DeTokenize ( IReadOnlyList < llama_token > tokens )
105
+ public string DeTokenize ( IReadOnlyList < LLamaToken > tokens )
108
106
{
109
107
// Do **not** use this method as an example of how to correctly use the StreamingTokenDecoder!
110
108
// It should be kept around for the entire time you are decoding one stream of tokens.
@@ -219,7 +217,7 @@ public void LoadState(State state)
219
217
/// <param name="pipeline">The pipeline to use to process the logits and to select a token</param>
220
218
/// <param name="lastTokens">The tokens recently returned from the model</param>
221
219
/// <returns>The selected token</returns>
222
- public llama_token Sample ( ISamplingPipeline pipeline , ReadOnlySpan < llama_token > lastTokens )
220
+ public LLamaToken Sample ( ISamplingPipeline pipeline , ReadOnlySpan < LLamaToken > lastTokens )
223
221
{
224
222
return pipeline . Sample ( NativeHandle , NativeHandle . GetLogits ( ) , lastTokens ) ;
225
223
}
@@ -240,11 +238,11 @@ public llama_token Sample(ISamplingPipeline pipeline, ReadOnlySpan<llama_token>
240
238
/// <param name="grammar"></param>
241
239
/// <param name="minP"></param>
242
240
/// <returns></returns>
243
- public llama_token Sample ( LLamaTokenDataArray candidates , ref float ? mirostat_mu , float temperature , MirostatType mirostat ,
244
- float mirostatTau , float mirostatEta , int topK , float topP , float tfsZ , float typicalP ,
245
- SafeLLamaGrammarHandle ? grammar , float minP )
241
+ public LLamaToken Sample ( LLamaTokenDataArray candidates , ref float ? mirostat_mu , float temperature , MirostatType mirostat ,
242
+ float mirostatTau , float mirostatEta , int topK , float topP , float tfsZ , float typicalP ,
243
+ SafeLLamaGrammarHandle ? grammar , float minP )
246
244
{
247
- llama_token id ;
245
+ LLamaToken id ;
248
246
249
247
if ( grammar != null )
250
248
{
@@ -301,7 +299,7 @@ public llama_token Sample(LLamaTokenDataArray candidates, ref float? mirostat_mu
301
299
/// <param name="alphaPresence"></param>
302
300
/// <param name="penalizeNL"></param>
303
301
/// <returns></returns>
304
- public LLamaTokenDataArray ApplyPenalty ( IEnumerable < llama_token > lastTokens , Dictionary < llama_token , float > ? logitBias = null ,
302
+ public LLamaTokenDataArray ApplyPenalty ( IEnumerable < LLamaToken > lastTokens , Dictionary < LLamaToken , float > ? logitBias = null ,
305
303
int repeatLastTokensCount = 64 , float repeatPenalty = 1.1f , float alphaFrequency = .0f , float alphaPresence = .0f ,
306
304
bool penalizeNL = true )
307
305
{
@@ -311,12 +309,12 @@ public LLamaTokenDataArray ApplyPenalty(IEnumerable<llama_token> lastTokens, Dic
311
309
if ( logitBias is not null )
312
310
{
313
311
foreach ( var ( key , value ) in logitBias )
314
- logits [ key ] += value ;
312
+ logits [ ( int ) key ] += value ;
315
313
}
316
314
317
315
// Save the newline logit value
318
- var nl_token = NativeApi . llama_token_nl ( NativeHandle . ModelHandle ) ;
319
- var nl_logit = logits [ nl_token ] ;
316
+ var nl_token = NativeApi . llama_token_nl ( NativeHandle . ModelHandle ) ;
317
+ var nl_logit = logits [ ( int ) nl_token ] ;
320
318
321
319
// Convert logits into token candidates
322
320
var candidates_p = LLamaTokenDataArray . Create ( logits ) ;
@@ -353,7 +351,7 @@ public LLamaTokenDataArray ApplyPenalty(IEnumerable<llama_token> lastTokens, Dic
353
351
/// <returns>The updated `pastTokensCount`.</returns>
354
352
/// <exception cref="RuntimeError"></exception>
355
353
[ Obsolete ( "use llama_decode() instead" ) ]
356
- public int Eval ( llama_token [ ] tokens , int pastTokensCount )
354
+ public int Eval ( LLamaToken [ ] tokens , int pastTokensCount )
357
355
{
358
356
return Eval ( tokens . AsSpan ( ) , pastTokensCount ) ;
359
357
}
@@ -366,7 +364,7 @@ public int Eval(llama_token[] tokens, int pastTokensCount)
366
364
/// <returns>The updated `pastTokensCount`.</returns>
367
365
/// <exception cref="RuntimeError"></exception>
368
366
[ Obsolete ( "use llama_decode() instead" ) ]
369
- public int Eval ( List < llama_token > tokens , int pastTokensCount )
367
+ public int Eval ( List < LLamaToken > tokens , int pastTokensCount )
370
368
{
371
369
#if NET5_0_OR_GREATER
372
370
var span = CollectionsMarshal . AsSpan ( tokens ) ;
@@ -376,15 +374,15 @@ public int Eval(List<llama_token> tokens, int pastTokensCount)
376
374
// the list. Instead rent an array and copy the data into it. This avoids an allocation, but can't
377
375
// avoid the copying.
378
376
379
- var rented = System . Buffers . ArrayPool < llama_token > . Shared . Rent ( tokens . Count ) ;
377
+ var rented = System . Buffers . ArrayPool < LLamaToken > . Shared . Rent ( tokens . Count ) ;
380
378
try
381
379
{
382
380
tokens . CopyTo ( rented , 0 ) ;
383
381
return Eval ( rented . AsSpan ( 0 , tokens . Count ) , pastTokensCount ) ;
384
382
}
385
383
finally
386
384
{
387
- System . Buffers . ArrayPool < llama_token > . Shared . Return ( rented ) ;
385
+ System . Buffers . ArrayPool < LLamaToken > . Shared . Return ( rented ) ;
388
386
}
389
387
#endif
390
388
}
@@ -397,7 +395,7 @@ public int Eval(List<llama_token> tokens, int pastTokensCount)
397
395
/// <returns>The updated `pastTokensCount`.</returns>
398
396
/// <exception cref="RuntimeError"></exception>
399
397
[ Obsolete ( "use llama_decode() instead" ) ]
400
- public int Eval ( ReadOnlyMemory < llama_token > tokens , int pastTokensCount )
398
+ public int Eval ( ReadOnlyMemory < LLamaToken > tokens , int pastTokensCount )
401
399
{
402
400
return Eval ( tokens . Span , pastTokensCount ) ;
403
401
}
@@ -410,7 +408,7 @@ public int Eval(ReadOnlyMemory<llama_token> tokens, int pastTokensCount)
410
408
/// <returns>The updated `pastTokensCount`.</returns>
411
409
/// <exception cref="RuntimeError"></exception>
412
410
[ Obsolete ( "use llama_decode() instead" ) ]
413
- public int Eval ( ReadOnlySpan < llama_token > tokens , int pastTokensCount )
411
+ public int Eval ( ReadOnlySpan < LLamaToken > tokens , int pastTokensCount )
414
412
{
415
413
var total = tokens . Length ;
416
414
for ( var i = 0 ; i < total ; i += ( int ) Params . BatchSize )
0 commit comments