Fix two bugs in kv-cache backtrack loop (mlc-ai#856)

shenberg · web-flow · commit ad3a6b998dab · 2023-10-07T22:59:15.000-04:00
Fix two bugs in kv-cache pop loop

Bug 1: old code would stop early because output_ids was shortened in-place during the loop

Bug 2: off-by-one in backoff size due to break
diff --git a/cpp/llm_chat.cc b/cpp/llm_chat.cc
@@ -1107,10 +1107,9 @@ class LLMChat {
           // back tracking, find the first set of token that is smaller
           // than the length
           size_t backoff = 0;
-          for (; backoff < output_ids_.size(); ++backoff) {
+          for (; (output_ids_.size() > 0) && (output_message_.length() > stop_pos); ++backoff) {
             output_ids_.pop_back();
             output_message_ = tokenizer_->Decode(output_ids_);
-            if (output_message_.length() <= stop_pos) break;
           }
           // resize kv to remove the context
           ft_.fkvcache_array_popn_(kv_cache_, backoff);