Skip to content

Commit d667ada

Browse files
ngxsonhazelnutcloud
authored andcommitted
add alias for chat template (ggml-org#5858)
1 parent 7c94ffb commit d667ada

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

examples/server/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ struct llama_server_context
413413
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
414414
if (res < 0) {
415415
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
416-
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
416+
sparams.chat_template = "chatml";
417417
}
418418
}
419419

llama.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13282,15 +13282,15 @@ static int32_t llama_chat_apply_template_internal(
1328213282
std::string & dest, bool add_ass) {
1328313283
// Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
1328413284
std::stringstream ss;
13285-
if (tmpl.find("<|im_start|>") != std::string::npos) {
13285+
if (tmpl == "chatml" || tmpl.find("<|im_start|>") != std::string::npos) {
1328613286
// chatml template
1328713287
for (auto message : chat) {
1328813288
ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
1328913289
}
1329013290
if (add_ass) {
1329113291
ss << "<|im_start|>assistant\n";
1329213292
}
13293-
} else if (tmpl.find("[INST]") != std::string::npos) {
13293+
} else if (tmpl == "llama2" || tmpl.find("[INST]") != std::string::npos) {
1329413294
// llama2 template and its variants
1329513295
// [variant] support system message
1329613296
bool support_system_message = tmpl.find("<<SYS>>") != std::string::npos;
@@ -13325,15 +13325,15 @@ static int32_t llama_chat_apply_template_internal(
1332513325
}
1332613326
}
1332713327
// llama2 templates seem to not care about "add_generation_prompt"
13328-
} else if (tmpl.find("<|user|>") != std::string::npos) {
13328+
} else if (tmpl == "zephyr" || tmpl.find("<|user|>") != std::string::npos) {
1332913329
// zephyr template
1333013330
for (auto message : chat) {
1333113331
ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
1333213332
}
1333313333
if (add_ass) {
1333413334
ss << "<|assistant|>\n";
1333513335
}
13336-
} else if (tmpl.find("bos_token + message['role']") != std::string::npos) {
13336+
} else if (tmpl == "monarch" || tmpl.find("bos_token + message['role']") != std::string::npos) {
1333713337
// mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
1333813338
for (auto message : chat) {
1333913339
std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
@@ -13342,7 +13342,7 @@ static int32_t llama_chat_apply_template_internal(
1334213342
if (add_ass) {
1334313343
ss << "<s>assistant\n";
1334413344
}
13345-
} else if (tmpl.find("<start_of_turn>") != std::string::npos) {
13345+
} else if (tmpl == "gemma" || tmpl.find("<start_of_turn>") != std::string::npos) {
1334613346
// google/gemma-7b-it
1334713347
std::string system_prompt = "";
1334813348
for (auto message : chat) {
@@ -13389,7 +13389,7 @@ LLAMA_API int32_t llama_chat_apply_template(
1338913389
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
1339013390
if (res < 0) {
1339113391
// worst case: there is no information about template, we will use chatml by default
13392-
curr_tmpl = "<|im_start|>"; // see llama_chat_apply_template_internal
13392+
curr_tmpl = "chatml"; // see llama_chat_apply_template_internal
1339313393
} else {
1339413394
curr_tmpl = std::string(model_template.data(), model_template.size());
1339513395
}

0 commit comments

Comments
 (0)