From b430c51f99391db51ea37753509ca222bca25ca2 Mon Sep 17 00:00:00 2001 From: Slaren <2141330+slaren@users.noreply.github.com> Date: Wed, 15 Mar 2023 22:14:09 +0100 Subject: [PATCH] Add --ignore-eos parameter --- main.cpp | 9 ++++++++- utils.cpp | 3 +++ utils.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index a812d0fa01842..7ba9804176ff2 100644 --- a/main.cpp +++ b/main.cpp @@ -27,6 +27,8 @@ #define ANSI_COLOR_RESET "\x1b[0m" #define ANSI_BOLD "\x1b[1m" +static const int EOS_TOKEN_ID = 2; + // determine number of model parts based on the dimension static const std::map LLAMA_N_PARTS = { { 4096, 1 }, @@ -949,6 +951,11 @@ int main(int argc, char ** argv) { { const int64_t t_start_sample_us = ggml_time_us(); + if (params.ignore_eos) { + // set the logit of the eos token to zero to avoid sampling it + logits[logits.size() - n_vocab + EOS_TOKEN_ID] = 0; + } + id = llama_sample_top_p_top_k(vocab, logits.data() + (logits.size() - n_vocab), last_n_tokens, repeat_penalty, top_k, top_p, temp, rng); last_n_tokens.erase(last_n_tokens.begin()); @@ -1037,7 +1044,7 @@ int main(int argc, char ** argv) { } // end of text token - if (embd.back() == 2) { + if (embd.back() == EOS_TOKEN_ID) { fprintf(stderr, " [end of text]\n"); break; } diff --git a/utils.cpp b/utils.cpp index aa3ad1053da02..79eade6676871 100644 --- a/utils.cpp +++ b/utils.cpp @@ -60,6 +60,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.use_color = true; } else if (arg == "-r" || arg == "--reverse-prompt") { params.antiprompt = argv[++i]; + } else if (arg == "--ignore-eos") { + params.ignore_eos = true; } else if (arg == "-h" || arg == "--help") { gpt_print_usage(argc, argv, params); exit(0); @@ -95,6 +97,7 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params) { fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n); fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f)\n", params.repeat_penalty); fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx); + fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating\n"); fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " -m FNAME, --model FNAME\n"); diff --git a/utils.h b/utils.h index 021120b0513c7..9c962003deda0 100644 --- a/utils.h +++ b/utils.h @@ -35,6 +35,7 @@ struct gpt_params { bool interactive = false; // interactive mode bool interactive_start = false; // reverse prompt immediately std::string antiprompt = ""; // string upon seeing which more user input is prompted + bool ignore_eos = false; // do not stop generating after eos }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params);