Skip to content

Commit a8842fd

Browse files
authored
Rename argument
1 parent 36083dc commit a8842fd

File tree

3 files changed

+11
-11
lines changed

3 files changed

+11
-11
lines changed

common/common.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,12 +1460,12 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
14601460
params.chat_template = argv[i];
14611461
return true;
14621462
}
1463-
if (arg == "--lcp-similarity") {
1463+
if (arg == "--slot-prompt-similarity" || arg == "-sps") {
14641464
if (++i >= argc) {
14651465
invalid_param = true;
14661466
return true;
14671467
}
1468-
params.lcp_similarity = std::stof(argv[i]);
1468+
params.slot_prompt_similarity = std::stof(argv[i]);
14691469
return true;
14701470
}
14711471
if (arg == "-pps") {
@@ -1839,8 +1839,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
18391839
"set custom jinja chat template (default: template taken from model's metadata)\n"
18401840
"only commonly used templates are accepted:\n"
18411841
"https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
1842-
options.push_back({ "server", " --lcp-similarity SIMILARITY",
1843-
"how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f)\n", params.lcp_similarity });
1842+
options.push_back({ "server", "-sps, --slot-prompt-similarity SIMILARITY",
1843+
"how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity });
18441844

18451845
#ifndef LOG_DISABLE_LOGS
18461846
options.push_back({ "logging" });

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ struct gpt_params {
202202

203203
std::string slot_save_path;
204204

205-
float lcp_similarity = 0.0f;
205+
float slot_prompt_similarity = 0.5f;
206206

207207
// batched-bench params
208208
bool is_pp_shared = false;

examples/server/server.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -647,8 +647,8 @@ struct server_context {
647647

648648
server_metrics metrics;
649649

650-
// Longest Common Prefix similarity for slot selection
651-
float lcp_similarity = 0.0f;
650+
// Necessary similarity of prompt for slot selection
651+
float slot_prompt_similarity = 0.0f;
652652

653653
~server_context() {
654654
if (ctx) {
@@ -812,7 +812,7 @@ struct server_context {
812812
server_slot * ret = nullptr;
813813

814814
// find the slot that has at least n% prompt similarity
815-
if (ret == nullptr && lcp_similarity != 0.0f && !prompt.empty()) {
815+
if (ret == nullptr && slot_prompt_similarity != 0.0f && !prompt.empty()) {
816816
int max_lcp_len = 0;
817817
float similarity = 0;
818818

@@ -840,7 +840,7 @@ struct server_context {
840840
similarity = static_cast<float>(lcp_len) / slot_prompt_len;
841841

842842
// select the current slot if the criteria match
843-
if (lcp_len > max_lcp_len && similarity > lcp_similarity) {
843+
if (lcp_len > max_lcp_len && similarity > slot_prompt_similarity) {
844844
max_lcp_len = lcp_len;
845845
ret = &slot;
846846
}
@@ -2568,8 +2568,8 @@ int main(int argc, char ** argv) {
25682568
log_data["api_key"] = "api_key: " + std::to_string(params.api_keys.size()) + " keys loaded";
25692569
}
25702570

2571-
// Longest Common Prefix similarity for slot selection
2572-
ctx_server.lcp_similarity = params.lcp_similarity;
2571+
// Necessary similarity of prompt for slot selection
2572+
ctx_server.slot_prompt_similarity = params.slot_prompt_similarity;
25732573

25742574
// load the model
25752575
if (!ctx_server.load_model(params)) {

0 commit comments

Comments
 (0)