Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions lib/src/llama.dart
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import 'dart:convert';
import 'dart:ffi';
import 'dart:math';
import 'dart:typed_data';

import 'package:ffi/ffi.dart';
import 'package:llama_cpp_dart/src/sampling_params.dart';
Expand Down Expand Up @@ -225,8 +224,14 @@ class Llama {
// Check if the sampled token is an EOS token.
bool isEOSToken = newTokenId.value == lib.llama_token_eos(model);

// Convert the token ID to its string representation.
final newTokenStr = tokenToPiece(newTokenId.value);
// Prepare the string representation of the sampled token.
String newTokenStr = "";

// Check that the sampled token is not the BOS token.
if (newTokenId.value != lib.llama_token_bos(model)) {
// Convert the token ID to its string representation.
newTokenStr = tokenToPiece(newTokenId.value);
}

// Update the batch and context for the next token generation.
batch.n_tokens = 0;
Expand Down Expand Up @@ -335,13 +340,16 @@ class Llama {
/// It handles the conversion and memory management involved in this process.
/// This is typically used in decoding the output of the model.
String tokenToPiece(int token) {
Pointer<Char> result = malloc.allocate<Char>(32);
int bufferSize = 64;
Pointer<Char> result = malloc.allocate<Char>(bufferSize);
try {
int nTokens = lib.llama_token_to_piece(model, token, result, 32);
int bytesWritten = lib.llama_token_to_piece(model, token, result, bufferSize);

bytesWritten = min(bytesWritten, bufferSize - 1);

final byteBuffer = result.cast<Uint8>().asTypedList(bytesWritten);

final ByteBuffer byteBuffer = result.cast<Uint8>().asTypedList(nTokens).buffer;

return utf8.decode(byteBuffer.asUint8List(), allowMalformed: false);
return utf8.decode(byteBuffer, allowMalformed: true);
} finally {
malloc.free(result);
}
Expand Down
17 changes: 6 additions & 11 deletions lib/src/llama_processor.dart
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class LlamaProcessor {
Llama.libraryPath = args['libraryPath'] as String?;

Llama? llama;
bool flagForStop = false;
Completer stopCompleter = Completer();

isolateReceivePort.listen((message) async {
if (message is Map) {
Expand All @@ -110,23 +110,18 @@ class LlamaProcessor {
case 'prompt':
llama?.setPrompt(message['prompt']);
while (true) {
if (flagForStop) {
flagForStop = false;
break;
}
if (stopCompleter.isCompleted) break;

var (text, done) = llama!.getNext();
if (done) break;
mainSendPort.send(text);
await Future.delayed(Duration.zero);

if (done) stopCompleter.complete();
}
break;
case 'stop':
flagForStop = true;
if (!stopCompleter.isCompleted) stopCompleter.complete();
llama?.clear();
break;
case 'clear':
// llama?.unloadModel();
break;
}
}
});
Expand Down
2 changes: 1 addition & 1 deletion src/llama.cpp
Submodule llama.cpp updated 187 files