From 1f7945f61f6b1677b7482071cf3a8c6cd1afcab8 Mon Sep 17 00:00:00 2001
From: Wouter Tichelaar <tichelaarw@spar.net>
Date: Thu, 18 Apr 2024 23:49:01 +0200
Subject: [PATCH 01/11] Added llama-3 chat template

---
 llama.cpp                    | 14 ++++++++++++++
 tests/test-chat-template.cpp |  6 +++++-
 2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/llama.cpp b/llama.cpp
index 18e473c095cd5..c2b8d1ecf4bde 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -17016,6 +17016,20 @@ static int32_t llama_chat_apply_template_internal(
         if (add_ass) {
             ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
         }
+    } else if (tmpl == "llama3" || (tmpl.find("<|start_header_id|>") != std::string::npos && tmpl.find("<|end_header_id|>") != std::string::npos)) {
+        // Llama 3
+        bool first_message = true; // add BOS at the beginning
+        for (auto message : chat) {
+            std::string role(message->role);
+            if(first_message){
+                ss << "<|begin_of_text|>";
+                first_message = false;
+            }
+            ss << "<|start_header_id|>" << message->role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
+        }
+        if (add_ass) {
+            ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
+        }
     } else {
         // template not supported
         return -1;
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index 522cc7d0d9e84..49581130dd63c 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -46,7 +46,9 @@ int main(void) {
         // No template included in tokenizer_config.json, so this template likely needs to be manually set.
         "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
         // CohereForAI/c4ai-command-r-plus
-        "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}"
+        "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
+        // Llama-3
+        "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
     };
     std::vector<std::string> expected_output = {
         // teknium/OpenHermes-2.5-Mistral-7B
@@ -73,6 +75,8 @@ int main(void) {
         "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT:    I am an assistant   </s>\nUSER: Another question\nASSISTANT:",
         // CohereForAI/c4ai-command-r-plus
         "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
+        // Llama-3
+        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
     };
     std::vector<char> formatted_chat(1024);
     int32_t res;

From 24874e7323905f718fd5df24a7065d8aaa8eabfe Mon Sep 17 00:00:00 2001
From: Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:27:03 +0200
Subject: [PATCH 02/11] Update llama.cpp

Co-authored-by: Samuel Tallet <36248671+SamuelTallet@users.noreply.github.com>
---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index c2b8d1ecf4bde..7d978edf62df7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -17025,7 +17025,7 @@ static int32_t llama_chat_apply_template_internal(
                 ss << "<|begin_of_text|>";
                 first_message = false;
             }
-            ss << "<|start_header_id|>" << message->role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
+            ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
         }
         if (add_ass) {
             ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";

From 70eb88c842fb372f45ca0e8ee7dc0df014f6de6c Mon Sep 17 00:00:00 2001
From: Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:27:16 +0200
Subject: [PATCH 03/11] Update llama.cpp

Co-authored-by: Samuel Tallet <36248671+SamuelTallet@users.noreply.github.com>
---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 7d978edf62df7..0cb34c2aa5ff4 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -17021,7 +17021,7 @@ static int32_t llama_chat_apply_template_internal(
         bool first_message = true; // add BOS at the beginning
         for (auto message : chat) {
             std::string role(message->role);
-            if(first_message){
+            if (first_message) {
                 ss << "<|begin_of_text|>";
                 first_message = false;
             }

From bf63ff5f29eedcd2350de17bf50c88849095e3d8 Mon Sep 17 00:00:00 2001
From: Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:28:19 +0200
Subject: [PATCH 04/11] Update tests/test-chat-template.cpp

Co-authored-by: Samuel Tallet <36248671+SamuelTallet@users.noreply.github.com>
---
 tests/test-chat-template.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index 49581130dd63c..6df40d913486d 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -75,7 +75,7 @@ int main(void) {
         "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT:    I am an assistant   </s>\nUSER: Another question\nASSISTANT:",
         // CohereForAI/c4ai-command-r-plus
         "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
-        // Llama-3
+        // Llama 3
         "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
     };
     std::vector<char> formatted_chat(1024);

From 7370d663a3812cb91251fa52c45204c2df122e84 Mon Sep 17 00:00:00 2001
From: Wouter Tichelaar <tichelaarw@spar.net>
Date: Fri, 19 Apr 2024 08:30:36 +0200
Subject: [PATCH 05/11] Added EOS stop sequence according to
 https://github.com/ggerganov/llama.cpp/pull/6751#issuecomment-2065602862

---
 examples/server/utils.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index a8d43ac63bf11..680fb97e18d04 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -385,6 +385,7 @@ static json oaicompat_completion_params_parse(
     // We must add their end sequences to list of stop words
     llama_params["stop"].push_back("<|im_end|>"); // chatml
     llama_params["stop"].push_back("<end_of_turn>"); // gemma
+    llama_params["stop"].push_back("<|eot_id|>"); // llama-3
 
     // Handle "response_format" field
     if (body.contains("response_format")) {

From a55d8a9348fc9e9215229bf03f96ecff4dcc7c91 Mon Sep 17 00:00:00 2001
From: Wouter Tichelaar <tichelaarw@spar.net>
Date: Fri, 19 Apr 2024 09:58:21 +0200
Subject: [PATCH 06/11] Removed adding of BOS token before first message

---
 llama.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index 0cb34c2aa5ff4..ad13b2a607924 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -17018,13 +17018,8 @@ static int32_t llama_chat_apply_template_internal(
         }
     } else if (tmpl == "llama3" || (tmpl.find("<|start_header_id|>") != std::string::npos && tmpl.find("<|end_header_id|>") != std::string::npos)) {
         // Llama 3
-        bool first_message = true; // add BOS at the beginning
         for (auto message : chat) {
             std::string role(message->role);
-            if (first_message) {
-                ss << "<|begin_of_text|>";
-                first_message = false;
-            }
             ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
         }
         if (add_ass) {

From 373bab1bd70605e59f3ba9093b59313fe8c2ea8e Mon Sep 17 00:00:00 2001
From: Wouter Tichelaar <tichelaarw@spar.net>
Date: Fri, 19 Apr 2024 11:45:38 +0200
Subject: [PATCH 07/11] Removed bos token from expected output from llama-3

---
 tests/test-chat-template.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index 6df40d913486d..8b81eecffe398 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -76,7 +76,7 @@ int main(void) {
         // CohereForAI/c4ai-command-r-plus
         "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
         // Llama 3
-        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+        "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
     };
     std::vector<char> formatted_chat(1024);
     int32_t res;

From 749cdb9c0f3cecbcdf67114745d5e70c9a82f93e Mon Sep 17 00:00:00 2001
From: Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
Date: Fri, 19 Apr 2024 19:15:10 +0200
Subject: [PATCH 08/11] Update tests/test-chat-template.cpp

Co-authored-by: Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>
---
 tests/test-chat-template.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index 8b81eecffe398..d632310efe291 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -48,7 +48,7 @@ int main(void) {
         // CohereForAI/c4ai-command-r-plus
         "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
         // Llama-3
-        "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
+        ,"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
     };
     std::vector<std::string> expected_output = {
         // teknium/OpenHermes-2.5-Mistral-7B

From 836c97c094f24a3247e8cb09a8dfc6e5694d3e78 Mon Sep 17 00:00:00 2001
From: Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
Date: Fri, 19 Apr 2024 20:43:45 +0200
Subject: [PATCH 09/11] Update tests/test-chat-template.cpp

Co-authored-by: Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>
---
 tests/test-chat-template.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index d632310efe291..cddf86a4105ea 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -48,7 +48,7 @@ int main(void) {
         // CohereForAI/c4ai-command-r-plus
         "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
         // Llama-3
-        ,"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
+        "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
     };
     std::vector<std::string> expected_output = {
         // teknium/OpenHermes-2.5-Mistral-7B

From 77a1303e2d2ff7b57e9efbf67b2f440f72584073 Mon Sep 17 00:00:00 2001
From: Wouter Tichelaar <tichelaarw@spar.net>
Date: Fri, 19 Apr 2024 22:10:54 +0200
Subject: [PATCH 10/11] Added <|end_of_text|> as another stop token

---
 examples/server/utils.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 680fb97e18d04..99c0141c85551 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -386,6 +386,7 @@ static json oaicompat_completion_params_parse(
     llama_params["stop"].push_back("<|im_end|>"); // chatml
     llama_params["stop"].push_back("<end_of_turn>"); // gemma
     llama_params["stop"].push_back("<|eot_id|>"); // llama-3
+    llama_params["stop"].push_back("<|end_of_text|>"); // llama-3
 
     // Handle "response_format" field
     if (body.contains("response_format")) {

From a06753581cfc3bd21f21e0298268cf644e02cac7 Mon Sep 17 00:00:00 2001
From: Wouter Tichelaar <tichelaarw@spar.net>
Date: Sat, 20 Apr 2024 01:41:09 +0200
Subject: [PATCH 11/11] Reverted last change of adding the end_of_text stop
 word for llama 3

---
 examples/server/utils.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 99c0141c85551..680fb97e18d04 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -386,7 +386,6 @@ static json oaicompat_completion_params_parse(
     llama_params["stop"].push_back("<|im_end|>"); // chatml
     llama_params["stop"].push_back("<end_of_turn>"); // gemma
     llama_params["stop"].push_back("<|eot_id|>"); // llama-3
-    llama_params["stop"].push_back("<|end_of_text|>"); // llama-3
 
     // Handle "response_format" field
     if (body.contains("response_format")) {