Skip to content

Commit 197df5f

Browse files
authored
Merge pull request #18 from jxy/limit_tokens
Compute remaining tokens along the way and exit if over
2 parents 235a411 + 5be098f commit 197df5f

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

chat.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,8 @@ int main(int argc, char ** argv) {
919919
" - If you want to submit another line, end your input in '\\'.\n");
920920
}
921921

922-
int remaining_tokens = params.n_predict;
922+
// we may want to slide the input window along with the context, but for now we restrict to the context length
923+
int remaining_tokens = model.hparams.n_ctx - embd_inp.size();
923924
int input_consumed = 0;
924925
bool input_noecho = true;
925926

@@ -935,7 +936,7 @@ int main(int argc, char ** argv) {
935936

936937

937938

938-
while (true) {
939+
while (remaining_tokens > 0) {
939940
// predict
940941
if (embd.size() > 0) {
941942
const int64_t t_start_us = ggml_time_us();
@@ -980,7 +981,7 @@ int main(int argc, char ** argv) {
980981
input_noecho = false;
981982

982983
// decrement remaining sampling budget
983-
// --remaining_tokens;
984+
--remaining_tokens;
984985
} else {
985986
// some user input remains from prompt or interaction, forward it to processing
986987
while (embd_inp.size() > input_consumed) {
@@ -1054,6 +1055,8 @@ int main(int argc, char ** argv) {
10541055
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
10551056
embd_inp.insert(embd_inp.end(), response_inp.begin(), response_inp.end());
10561057

1058+
remaining_tokens -= prompt_inp.size() + line_inp.size() + response_inp.size();
1059+
10571060
input_noecho = true; // do not echo this again
10581061
}
10591062

0 commit comments

Comments
 (0)