Skip to content

Commit 88184ed

Browse files
committed
fix CJK output again
1 parent 66fb034 commit 88184ed

File tree

1 file changed

+61
-24
lines changed

1 file changed

+61
-24
lines changed

llama_cpp/llama.py

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,13 +1003,15 @@ def _create_completion(
10031003
break
10041004

10051005
token_end_position = 0
1006-
for token in remaining_tokens:
1007-
token_end_position += len(self.detokenize([token]))
1008-
# Check if stop sequence is in the token
1009-
if token_end_position >= (remaining_length - first_stop_position):
1010-
break
1011-
logprobs_or_none: Optional[CompletionLogprobs] = None
1012-
if logprobs is not None:
1006+
1007+
if logprobs is not None:
1008+
# not sure how to handle this branch when dealing
1009+
# with CJK output, so keep it unchanged
1010+
for token in remaining_tokens:
1011+
token_end_position += len(self.detokenize([token]))
1012+
# Check if stop sequence is in the token
1013+
if token_end_position > (remaining_length - first_stop_position):
1014+
break
10131015
token_str = self.detokenize([token]).decode(
10141016
"utf-8", errors="ignore"
10151017
)
@@ -1042,23 +1044,58 @@ def _create_completion(
10421044
"token_logprobs": [current_logprobs[int(token)]],
10431045
"top_logprobs": [top_logprob],
10441046
}
1045-
returned_tokens += 1
1046-
yield {
1047-
"id": completion_id,
1048-
"object": "text_completion",
1049-
"created": created,
1050-
"model": model_name,
1051-
"choices": [
1052-
{
1053-
"text": self.detokenize([token]).decode(
1054-
"utf-8", errors="ignore"
1055-
),
1056-
"index": 0,
1057-
"logprobs": logprobs_or_none,
1058-
"finish_reason": None,
1059-
}
1060-
],
1061-
}
1047+
returned_tokens += 1
1048+
yield {
1049+
"id": completion_id,
1050+
"object": "text_completion",
1051+
"created": created,
1052+
"model": model_name,
1053+
"choices": [
1054+
{
1055+
"text": self.detokenize([token]).decode(
1056+
"utf-8", errors="ignore"
1057+
),
1058+
"index": 0,
1059+
"logprobs": logprobs_or_none,
1060+
"finish_reason": None,
1061+
}
1062+
],
1063+
}
1064+
else:
1065+
while len(remaining_tokens) > 0:
1066+
decode_success = False
1067+
for i in range(1, len(remaining_tokens) + 1):
1068+
tokens = remaining_tokens[:i]
1069+
try:
1070+
bs = self.detokenize(tokens)
1071+
text = bs.decode('utf-8')
1072+
decode_success = True
1073+
break
1074+
except UnicodeError:
1075+
pass
1076+
if not decode_success:
1077+
# all remaining tokens cannot be decoded to a UTF-8 character
1078+
break
1079+
token_end_position += len(bs)
1080+
if token_end_position > (remaining_length - first_stop_position):
1081+
break
1082+
remaining_tokens = remaining_tokens[i:]
1083+
returned_tokens += i
1084+
1085+
yield {
1086+
"id": completion_id,
1087+
"object": "text_completion",
1088+
"created": created,
1089+
"model": model_name,
1090+
"choices": [
1091+
{
1092+
"text": text,
1093+
"index": 0,
1094+
"logprobs": None,
1095+
"finish_reason": None,
1096+
}
1097+
],
1098+
}
10621099

10631100
if len(completion_tokens) >= max_tokens:
10641101
text = self.detokenize(completion_tokens)

0 commit comments

Comments
 (0)