From 8af1991e2a0b371391cc3dc7da9b1a48bd8d847e Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 21 Aug 2023 15:40:51 +0300
Subject: [PATCH 1/3] main : restore old EOS behavior in interactive mode

---
 examples/main/main.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index a632bea1cf2b9..26ec48ed5e158 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -634,6 +634,11 @@ int main(int argc, char ** argv) {
                     llama_grammar_accept_token(ctx, grammar, id);
                 }
 
+                // replace end of text token with newline token when in interactive mode
+                if (id == llama_token_eos() && params.interactive && !params.instruct && !params.input_prefix_bos) {
+                    id = llama_token_nl();
+                }
+
                 last_n_tokens.erase(last_n_tokens.begin());
                 last_n_tokens.push_back(id);
             }

From e3da126f2a86706d0ad718057e0371b9dab4bc52 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 21 Aug 2023 16:41:27 +0300
Subject: [PATCH 2/3] main : inject reverse prompt after EOS + update
 examples/chat.sh

---
 examples/chat.sh       | 8 +++++---
 examples/main/main.cpp | 7 ++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/examples/chat.sh b/examples/chat.sh
index 9a928ef05431a..bd48c83034acf 100755
--- a/examples/chat.sh
+++ b/examples/chat.sh
@@ -11,6 +11,8 @@ cd ..
 #
 #   "--keep 48" is based on the contents of prompts/chat-with-bob.txt
 #
-./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n 256 --keep 48 \
-    --repeat_penalty 1.0 --color -i \
-    -r "User:" -f prompts/chat-with-bob.txt
+./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n -1 --keep 48 \
+    --repeat_penalty 1.0 --color \
+    -i --interactive-first \
+    -r "User:" --in-prefix " " \
+    -f prompts/chat-with-bob.txt
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 26ec48ed5e158..8a87b534b81e7 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -634,9 +634,14 @@ int main(int argc, char ** argv) {
                     llama_grammar_accept_token(ctx, grammar, id);
                 }
 
-                // replace end of text token with newline token when in interactive mode
+                // replace end of text token with newline token and inject reverse prompt when in interactive mode
                 if (id == llama_token_eos() && params.interactive && !params.instruct && !params.input_prefix_bos) {
                     id = llama_token_nl();
+                    if (params.antiprompt.size() != 0) {
+                        // tokenize and inject first reverse prompt
+                        const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
+                        embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
+                    }
                 }
 
                 last_n_tokens.erase(last_n_tokens.begin());

From d3f5fbef6c2825bb49f1f166def3b9fe5195d403 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 21 Aug 2023 19:52:51 +0300
Subject: [PATCH 3/3] main : flush stdout

---
 examples/main/main.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 8a87b534b81e7..5f69042d811be 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -725,8 +725,8 @@ int main(int argc, char ** argv) {
 
                     is_interacting = true;
                     printf("\n");
-                    console::set_display(console::user_input);
                     fflush(stdout);
+                    console::set_display(console::user_input);
                 } else if (params.instruct) {
                     is_interacting = true;
                 }
@@ -735,6 +735,7 @@ int main(int argc, char ** argv) {
             if (n_past > 0 && is_interacting) {
                 if (params.instruct) {
                     printf("\n> ");
+                    fflush(stdout);
                 }
 
                 if (params.input_prefix_bos) {
@@ -745,6 +746,7 @@ int main(int argc, char ** argv) {
                 if (!params.input_prefix.empty()) {
                     buffer += params.input_prefix;
                     printf("%s", buffer.c_str());
+                    fflush(stdout);
                 }
 
                 std::string line;
@@ -764,6 +766,7 @@ int main(int argc, char ** argv) {
                     if (!params.input_suffix.empty()) {
                         buffer += params.input_suffix;
                         printf("%s", params.input_suffix.c_str());
+                        fflush(stdout);
                     }
 
                     // instruct mode: insert instruction prefix