Skip to content

Low level chat: Added iterative search to prevent instructions from being echoed #63

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/low_level_api/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class GptParams:
instruct: bool = False
ignore_eos: bool = False
perplexity: bool = False
use_mmap: bool = True
use_mlock: bool = False
mem_test: bool = False
verbose_prompt: bool = False
Expand Down Expand Up @@ -110,7 +111,9 @@ def gpt_params_parse(argv = None, params: Optional[GptParams] = None):
dest="use_color"
)
parser.add_argument("--mlock", action="store_true",help="force system to keep model in RAM rather than swapping or compressing",dest="use_mlock")
parser.add_argument("--no-mmap", action="store_false",help="do not memory-map model (slower load but may reduce pageouts if not using mlock)",dest="use_mmap")
parser.add_argument("--mtest", action="store_true",help="compute maximum memory usage",dest="mem_test")
parser.add_argument("--verbose-prompt", action="store_true",help="print prompt before generation",dest="verbose_prompt")
parser.add_argument(
"-r",
"--reverse-prompt",
Expand Down
36 changes: 32 additions & 4 deletions examples/low_level_api/low_level_api_chat_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,25 @@
CONSOLE_COLOR_PROMPT = ANSI_COLOR_YELLOW
CONSOLE_COLOR_USER_INPUT = ANSI_BOLD + ANSI_COLOR_GREEN

# Iterative search
# Actively searches and prevents a pattern from being returned
class IterSearch:
def __init__(self, pattern):
self.pattern = list(pattern)
self.buffer = []

def __call__(self, char):
self.buffer += [char]

if (self.pattern[:len(self.buffer)] == self.buffer):
if (len(self.buffer) >= len(self.pattern)):
self.buffer.clear()
return []

_tmp = self.buffer[:]
self.buffer.clear()
return _tmp

# A LLaMA interactive session
class LLaMAInteract:
def __init__(self, params: GptParams) -> None:
Expand Down Expand Up @@ -69,6 +88,7 @@ def __init__(self, params: GptParams) -> None:
self.lparams.seed = self.params.seed
self.lparams.memory_f16 = self.params.memory_f16
self.lparams.use_mlock = self.params.use_mlock
self.lparams.use_mmap = self.params.use_mmap

self.ctx = llama_cpp.llama_init_from_file(self.params.model.encode("utf8"), self.lparams)
if (not self.ctx):
Expand Down Expand Up @@ -114,7 +134,9 @@ def __init__(self, params: GptParams) -> None:
# in instruct mode, we inject a prefix and a suffix to each input by the user
if (self.params.instruct):
self.params.interactive_start = True
self.first_antiprompt.append(self._tokenize(self.params.instruct_inp_prefix.strip(), False))
_ptn = self._tokenize(self.params.instruct_inp_prefix.strip(), False)
self.first_antiprompt.append(_ptn)
self.antiecho = IterSearch(_ptn)

# enable interactive mode if reverse prompt or interactive start is specified
if (len(self.params.antiprompt) != 0 or self.params.interactive_start):
Expand Down Expand Up @@ -217,7 +239,9 @@ def generate(self):
if len(self.embd_inp) <= self.input_consumed:
# out of user input, sample next token

#TODO: self.params.ignore_eos
if (self.params.ignore_eos):
logits = llama_cpp.llama_get_logits(self.ctx)
logits[llama_cpp.llama_token_eos()] = llama_cpp.c_float(0)

_arr = self.last_n_tokens[-min(self.params.repeat_last_n, self.n_past):]
id = llama_cpp.llama_sample_top_p_top_k(
Expand Down Expand Up @@ -263,7 +287,11 @@ def generate(self):
# display tokens
if self.output_echo:
for id in self.embd:
yield id
if self.params.instruct:
for r in self.antiecho(id):
yield r
else:
yield id

# reset color to default if we there is no pending user input
if (self.params.input_echo and len(self.embd_inp) == self.input_consumed):
Expand All @@ -279,7 +307,7 @@ def generate(self):
break

# if we are using instruction mode, and we have processed the initial prompt
if (self.n_past > 0 and self.params.interactive_start):
if (self.params.interactive_start):
break

# end of text token
Expand Down