From dabd89d88d9150645fc5c284e1985702620c1a5b Mon Sep 17 00:00:00 2001 From: River Zhou Date: Sun, 9 Apr 2023 23:30:43 +0800 Subject: [PATCH] Bugfix: Fix broken: UnicodeDecodeError: 'utf-8' codec can't decode --- llama_cpp/llama.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index bd8f49f5e..3ad2beb4b 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -360,6 +360,16 @@ def _create_completion( break text = all_text[: len(all_text) - longest] returned_characters += len(text[start:]) + _text = '' + try: + _text = text[start:].decode("utf-8") + except UnicodeDecodeError: + for i in range(1,4): + try: + _text = text[start:-i].decode("utf-8") + break + except UnicodeDecodeError: + continue yield { "id": completion_id, "object": "text_completion", @@ -367,7 +377,7 @@ def _create_completion( "model": self.model_path, "choices": [ { - "text": text[start:].decode("utf-8"), + "text": _text, "index": 0, "logprobs": None, "finish_reason": None,