From dd459b604ffb4c829b4f51a34988ac70f8e4209f Mon Sep 17 00:00:00 2001 From: Johnman Date: Sun, 19 Mar 2023 22:59:16 +0100 Subject: [PATCH 1/2] Reset token budget after every user intervention. In interactive mode, every time the model has to respond to user input it has an increasingly reduced token budget, eventually generating only a few words before stopping. The token budget in interactive should apply to every batch of tokens after user intervention, not globally --- main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index c005d17ccd312..0a24887113b67 100644 --- a/main.cpp +++ b/main.cpp @@ -1054,11 +1054,11 @@ int main(int argc, char ** argv) { embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); } - remaining_tokens -= line_inp.size(); + remaining_tokens = params.n_predict - line_inp.size(); input_noecho = true; // do not echo this again + is_interacting = false; } - is_interacting = false; } // end of text token From fbff2684610d9353a1b0e8d9580194474d2400d2 Mon Sep 17 00:00:00 2001 From: Johnman Date: Sun, 19 Mar 2023 23:38:52 +0100 Subject: [PATCH 2/2] Don't let remaining_tokens get larger than the context. --- main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index 0a24887113b67..a1f0f92d6f28d 100644 --- a/main.cpp +++ b/main.cpp @@ -1054,7 +1054,7 @@ int main(int argc, char ** argv) { embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); } - remaining_tokens = params.n_predict - line_inp.size(); + remaining_tokens = std::min(params.n_predict, model.hparams.n_ctx - (int)embd_inp.size()); input_noecho = true; // do not echo this again is_interacting = false; @@ -1073,7 +1073,7 @@ int main(int argc, char ** argv) { // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. if (params.interactive && remaining_tokens <= 0) { - remaining_tokens = params.n_predict; + remaining_tokens = std::min(params.n_predict, model.hparams.n_ctx - (int)embd_inp.size()); // Will get overriden when we get to user input, but we need it to not be 0 when this iteration ends. is_interacting = true; } }