diff --git a/examples/low_level_api/low_level_api_chat_cpp.py b/examples/low_level_api/low_level_api_chat_cpp.py index 8773cb1e3..756609e9d 100644 --- a/examples/low_level_api/low_level_api_chat_cpp.py +++ b/examples/low_level_api/low_level_api_chat_cpp.py @@ -382,12 +382,15 @@ def generate(self): # replace end of text token with newline token when in interactive mode if (id == llama_cpp.llama_token_eos() and self.params.interactive and not self.params.instruct): id = self.llama_token_newline[0] + self.embd.append(id) if (self.use_antiprompt()): # tokenize and inject first reverse prompt self.embd_inp += self.first_antiprompt[0] - - # add it to the context - self.embd.append(id) + for id in self.first_antiprompt[0]: + self.embd.append(id) + else: + # add it to the context + self.embd.append(id) # echo this to console self.output_echo = True