From 8af1991e2a0b371391cc3dc7da9b1a48bd8d847e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 21 Aug 2023 15:40:51 +0300 Subject: [PATCH 1/3] main : restore old EOS behavior in interactive mode --- examples/main/main.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index a632bea1cf2b9..26ec48ed5e158 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -634,6 +634,11 @@ int main(int argc, char ** argv) { llama_grammar_accept_token(ctx, grammar, id); } + // replace end of text token with newline token when in interactive mode + if (id == llama_token_eos() && params.interactive && !params.instruct && !params.input_prefix_bos) { + id = llama_token_nl(); + } + last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(id); } From e3da126f2a86706d0ad718057e0371b9dab4bc52 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 21 Aug 2023 16:41:27 +0300 Subject: [PATCH 2/3] main : inject reverse prompt after EOS + update examples/chat.sh --- examples/chat.sh | 8 +++++--- examples/main/main.cpp | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/chat.sh b/examples/chat.sh index 9a928ef05431a..bd48c83034acf 100755 --- a/examples/chat.sh +++ b/examples/chat.sh @@ -11,6 +11,8 @@ cd .. # # "--keep 48" is based on the contents of prompts/chat-with-bob.txt # -./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n 256 --keep 48 \ - --repeat_penalty 1.0 --color -i \ - -r "User:" -f prompts/chat-with-bob.txt +./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n -1 --keep 48 \ + --repeat_penalty 1.0 --color \ + -i --interactive-first \ + -r "User:" --in-prefix " " \ + -f prompts/chat-with-bob.txt diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 26ec48ed5e158..8a87b534b81e7 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -634,9 +634,14 @@ int main(int argc, char ** argv) { llama_grammar_accept_token(ctx, grammar, id); } - // replace end of text token with newline token when in interactive mode + // replace end of text token with newline token and inject reverse prompt when in interactive mode if (id == llama_token_eos() && params.interactive && !params.instruct && !params.input_prefix_bos) { id = llama_token_nl(); + if (params.antiprompt.size() != 0) { + // tokenize and inject first reverse prompt + const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); + embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); + } } last_n_tokens.erase(last_n_tokens.begin()); From d3f5fbef6c2825bb49f1f166def3b9fe5195d403 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 21 Aug 2023 19:52:51 +0300 Subject: [PATCH 3/3] main : flush stdout --- examples/main/main.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 8a87b534b81e7..5f69042d811be 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -725,8 +725,8 @@ int main(int argc, char ** argv) { is_interacting = true; printf("\n"); - console::set_display(console::user_input); fflush(stdout); + console::set_display(console::user_input); } else if (params.instruct) { is_interacting = true; } @@ -735,6 +735,7 @@ int main(int argc, char ** argv) { if (n_past > 0 && is_interacting) { if (params.instruct) { printf("\n> "); + fflush(stdout); } if (params.input_prefix_bos) { @@ -745,6 +746,7 @@ int main(int argc, char ** argv) { if (!params.input_prefix.empty()) { buffer += params.input_prefix; printf("%s", buffer.c_str()); + fflush(stdout); } std::string line; @@ -764,6 +766,7 @@ int main(int argc, char ** argv) { if (!params.input_suffix.empty()) { buffer += params.input_suffix; printf("%s", params.input_suffix.c_str()); + fflush(stdout); } // instruct mode: insert instruction prefix