Skip to content

Commit 1bfa591

Browse files
sasha0552arthw
authored andcommitted
server : fix slot selection by lru (ggml-org#10126)
* server : fix slot selection by lru, migrate lcs to `size_t` * minor debug log fix
1 parent c9ed90b commit 1bfa591

File tree

2 files changed

+15
-13
lines changed

2 files changed

+15
-13
lines changed

examples/server/server.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ struct server_slot {
247247
if (is_processing()) {
248248
SLT_INF(*this, "stop processing: n_past = %d, truncated = %d\n", n_past, truncated);
249249

250+
t_last_used = ggml_time_us();
250251
t_token_generation = (ggml_time_us() - t_start_generation) / 1e3;
251252
state = SLOT_STATE_IDLE;
252253
callback_on_release(id);
@@ -730,7 +731,7 @@ struct server_context {
730731

731732
// find the slot that has at least n% prompt similarity
732733
if (ret == nullptr && slot_prompt_similarity != 0.0f) {
733-
int max_lcs_len = 0;
734+
int lcs_len = 0;
734735
float similarity = 0;
735736

736737
for (server_slot & slot : slots) {
@@ -745,20 +746,21 @@ struct server_context {
745746
}
746747

747748
// length of the Longest Common Subsequence between the current slot's prompt and the input prompt
748-
int lcs_len = longest_common_subsequence(slot.cache_tokens, task.prompt_tokens);
749+
int cur_lcs_len = longest_common_subsequence(slot.cache_tokens, task.prompt_tokens);
749750

750751
// fraction of the common subsequence length compared to the current slot's prompt length
751-
similarity = static_cast<float>(lcs_len) / static_cast<int>(slot.cache_tokens.size());
752+
float cur_similarity = static_cast<float>(cur_lcs_len) / static_cast<int>(slot.cache_tokens.size());
752753

753754
// select the current slot if the criteria match
754-
if (lcs_len > max_lcs_len && similarity > slot_prompt_similarity) {
755-
max_lcs_len = lcs_len;
755+
if (cur_lcs_len > lcs_len && cur_similarity > slot_prompt_similarity) {
756+
lcs_len = cur_lcs_len;
757+
similarity = cur_similarity;
756758
ret = &slot;
757759
}
758760
}
759761

760762
if (ret != nullptr) {
761-
SLT_DBG(*ret, "selected slot by lcs similarity, max_lcs_len = %d, similarity = %f\n", max_lcs_len, similarity);
763+
SLT_DBG(*ret, "selected slot by lcs similarity, lcs_len = %d, similarity = %f\n", lcs_len, similarity);
762764
}
763765
}
764766

examples/server/utils.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -453,20 +453,20 @@ static size_t longest_common_subsequence(const llama_tokens & a, const llama_tok
453453
}
454454

455455
// get the lengths of the input sequences
456-
int a_len = a.size();
457-
int b_len = b.size();
456+
size_t a_len = a.size();
457+
size_t b_len = b.size();
458458

459459
// initialize the maximum length of the longest common subsequence (LCS)
460-
int max_length = 0;
460+
size_t max_length = 0;
461461

462462
// use two rows instead of a 2D matrix to optimize space
463-
std::vector<int> prev_row(b_len + 1, 0);
464-
std::vector<int> curr_row(b_len + 1, 0);
463+
std::vector<size_t> prev_row(b_len + 1, 0);
464+
std::vector<size_t> curr_row(b_len + 1, 0);
465465

466466
// iterate through the elements of a
467-
for (int i = 1; i <= a_len; i++) {
467+
for (size_t i = 1; i <= a_len; i++) {
468468
// iterate through the elements of b
469-
for (int j = 1; j <= b_len; j++) {
469+
for (size_t j = 1; j <= b_len; j++) {
470470
// if elements at the current positions match
471471
if (a[i - 1] == b[j - 1]) {
472472
// if it's the first element of either sequences, set LCS length to 1

0 commit comments

Comments
 (0)