Skip to content

Commit c39547a

Browse files
author
Mug
committed
Detect multi-byte responses and wait
1 parent 5f81400 commit c39547a

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

examples/low_level_api/low_level_api_chat_cpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __init__(self, params: GptParams) -> None:
9696

9797
print(file=sys.stderr)
9898
print(f"system_info: n_threads = {self.params.n_threads} / {cpu_count()} \
99-
| {llama_cpp.llama_print_system_info().decode('utf8', errors='ignore')}", file=sys.stderr)
99+
| {llama_cpp.llama_print_system_info().decode('utf8')}", file=sys.stderr)
100100

101101
# determine the required inference memory per token:
102102
if (self.params.mem_test):

llama_cpp/llama.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def __init__(
159159
)
160160

161161
if self.verbose:
162-
print(llama_cpp.llama_print_system_info().decode("utf-8", errors="ignore"), file=sys.stderr)
162+
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
163163

164164
def tokenize(self, text: bytes) -> List[llama_cpp.llama_token]:
165165
"""Tokenize a string.
@@ -446,6 +446,7 @@ def _create_completion(
446446
self.load_state(self.cache[prompt_tokens])
447447

448448
finish_reason = "length"
449+
multibyte_fix = 0
449450
for token in self.generate(
450451
prompt_tokens,
451452
top_k=top_k,
@@ -458,6 +459,12 @@ def _create_completion(
458459
finish_reason = "stop"
459460
break
460461

462+
# Contains multi-byte UTF8
463+
for num,pattern in [(2, 192), (3, 224), (4, 240)]:
464+
# Bitwise AND check
465+
if (pattern & token == pattern):
466+
multibyte_fix = num
467+
461468
if self.cache and len(completion_tokens) == 0:
462469
if prompt_tokens not in self.cache:
463470
if self.verbose:
@@ -466,6 +473,11 @@ def _create_completion(
466473

467474
completion_tokens.append(token)
468475

476+
# Stop incomplete bytes from passing
477+
if (multibyte_fix > 0):
478+
multibyte_fix -= 1
479+
continue
480+
469481
all_text = self.detokenize(completion_tokens)
470482
any_stop = [s for s in stop_sequences if s in all_text]
471483
if len(any_stop) > 0:

0 commit comments

Comments
 (0)