|
1 | 1 | import 'dart:convert';
|
2 | 2 | import 'dart:ffi';
|
3 | 3 | import 'dart:math';
|
4 |
| -import 'dart:typed_data'; |
5 | 4 |
|
6 | 5 | import 'package:ffi/ffi.dart';
|
7 | 6 | import 'package:llama_cpp_dart/src/sampling_params.dart';
|
@@ -225,8 +224,14 @@ class Llama {
|
225 | 224 | // Check if the sampled token is an EOS token.
|
226 | 225 | bool isEOSToken = newTokenId.value == lib.llama_token_eos(model);
|
227 | 226 |
|
228 |
| - // Convert the token ID to its string representation. |
229 |
| - final newTokenStr = tokenToPiece(newTokenId.value); |
| 227 | + // Prepare the string representation of the sampled token. |
| 228 | + String newTokenStr = ""; |
| 229 | + |
| 230 | + // Check that the sampled token is not the BOS token. |
| 231 | + if (newTokenId.value != lib.llama_token_bos(model)) { |
| 232 | + // Convert the token ID to its string representation. |
| 233 | + newTokenStr = tokenToPiece(newTokenId.value); |
| 234 | + } |
230 | 235 |
|
231 | 236 | // Update the batch and context for the next token generation.
|
232 | 237 | batch.n_tokens = 0;
|
@@ -335,13 +340,16 @@ class Llama {
|
335 | 340 | /// It handles the conversion and memory management involved in this process.
|
336 | 341 | /// This is typically used in decoding the output of the model.
|
337 | 342 | String tokenToPiece(int token) {
|
338 |
| - Pointer<Char> result = malloc.allocate<Char>(32); |
| 343 | + int bufferSize = 64; |
| 344 | + Pointer<Char> result = malloc.allocate<Char>(bufferSize); |
339 | 345 | try {
|
340 |
| - int nTokens = lib.llama_token_to_piece(model, token, result, 32); |
| 346 | + int bytesWritten = lib.llama_token_to_piece(model, token, result, bufferSize); |
| 347 | + |
| 348 | + bytesWritten = min(bytesWritten, bufferSize - 1); |
| 349 | + |
| 350 | + final byteBuffer = result.cast<Uint8>().asTypedList(bytesWritten); |
341 | 351 |
|
342 |
| - final ByteBuffer byteBuffer = result.cast<Uint8>().asTypedList(nTokens).buffer; |
343 |
| - |
344 |
| - return utf8.decode(byteBuffer.asUint8List(), allowMalformed: false); |
| 352 | + return utf8.decode(byteBuffer, allowMalformed: true); |
345 | 353 | } finally {
|
346 | 354 | malloc.free(result);
|
347 | 355 | }
|
|
0 commit comments