@@ -237,7 +237,7 @@ int main(int argc, char ** argv) {
237
237
238
238
if (params.interactive_first || params.instruct || !params.prompt .empty () || session_tokens.empty ()) {
239
239
LOG (" tokenize the prompt\n " );
240
- embd_inp = ::llama_tokenize (ctx, params.prompt , add_bos);
240
+ embd_inp = ::llama_tokenize (ctx, params.prompt , add_bos, true );
241
241
} else {
242
242
LOG (" use session tokens\n " );
243
243
embd_inp = session_tokens;
@@ -259,10 +259,10 @@ int main(int argc, char ** argv) {
259
259
if (ctx_guidance) {
260
260
LOG (" cfg_negative_prompt: \" %s\"\n " , log_tostr (params.cfg_negative_prompt ));
261
261
262
- guidance_inp = ::llama_tokenize (ctx_guidance, params.cfg_negative_prompt , add_bos);
262
+ guidance_inp = ::llama_tokenize (ctx_guidance, params.cfg_negative_prompt , add_bos, true );
263
263
LOG (" guidance_inp tokenized: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx_guidance, guidance_inp));
264
264
265
- std::vector<llama_token> original_inp = ::llama_tokenize (ctx, params.prompt , add_bos);
265
+ std::vector<llama_token> original_inp = ::llama_tokenize (ctx, params.prompt , add_bos, true );
266
266
LOG (" original_inp tokenized: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, original_inp));
267
267
268
268
original_prompt_len = original_inp.size ();
@@ -316,8 +316,8 @@ int main(int argc, char ** argv) {
316
316
}
317
317
318
318
// prefix & suffix for instruct mode
319
- const auto inp_pfx = ::llama_tokenize (ctx, " \n\n ### Instruction:\n\n " , add_bos);
320
- const auto inp_sfx = ::llama_tokenize (ctx, " \n\n ### Response:\n\n " , false );
319
+ const auto inp_pfx = ::llama_tokenize (ctx, " \n\n ### Instruction:\n\n " , add_bos, true );
320
+ const auto inp_sfx = ::llama_tokenize (ctx, " \n\n ### Response:\n\n " , false , true );
321
321
322
322
LOG (" inp_pfx: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, inp_pfx));
323
323
LOG (" inp_sfx: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, inp_sfx));
@@ -715,7 +715,7 @@ int main(int argc, char ** argv) {
715
715
if (params.interactive ) {
716
716
if (!params.antiprompt .empty ()) {
717
717
// tokenize and inject first reverse prompt
718
- const auto first_antiprompt = ::llama_tokenize (ctx, params.antiprompt .front (), false );
718
+ const auto first_antiprompt = ::llama_tokenize (ctx, params.antiprompt .front (), false , true );
719
719
embd_inp.insert (embd_inp.end (), first_antiprompt.begin (), first_antiprompt.end ());
720
720
is_antiprompt = true ;
721
721
}
@@ -780,7 +780,7 @@ int main(int argc, char ** argv) {
780
780
embd_inp.insert (embd_inp.end (), inp_pfx.begin (), inp_pfx.end ());
781
781
}
782
782
783
- const auto line_inp = ::llama_tokenize (ctx, buffer, false );
783
+ const auto line_inp = ::llama_tokenize (ctx, buffer, false , true );
784
784
LOG (" input tokens: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, line_inp));
785
785
786
786
embd_inp.insert (embd_inp.end (), line_inp.begin (), line_inp.end ());
0 commit comments