@@ -238,7 +238,7 @@ int main(int argc, char ** argv) {
238
238
239
239
if (params.interactive_first || params.instruct || !params.prompt .empty () || session_tokens.empty ()) {
240
240
LOG (" tokenize the prompt\n " );
241
- embd_inp = ::llama_tokenize (ctx, params.prompt , add_bos);
241
+ embd_inp = ::llama_tokenize (ctx, params.prompt , add_bos, true );
242
242
} else {
243
243
LOG (" use session tokens\n " );
244
244
embd_inp = session_tokens;
@@ -260,10 +260,10 @@ int main(int argc, char ** argv) {
260
260
if (ctx_guidance) {
261
261
LOG (" cfg_negative_prompt: \" %s\"\n " , log_tostr (sparams.cfg_negative_prompt ));
262
262
263
- guidance_inp = ::llama_tokenize (ctx_guidance, sparams.cfg_negative_prompt , add_bos);
263
+ guidance_inp = ::llama_tokenize (ctx_guidance, sparams.cfg_negative_prompt , add_bos, true );
264
264
LOG (" guidance_inp tokenized: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx_guidance, guidance_inp));
265
265
266
- std::vector<llama_token> original_inp = ::llama_tokenize (ctx, params.prompt , add_bos);
266
+ std::vector<llama_token> original_inp = ::llama_tokenize (ctx, params.prompt , add_bos, true );
267
267
LOG (" original_inp tokenized: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, original_inp));
268
268
269
269
original_prompt_len = original_inp.size ();
@@ -320,8 +320,8 @@ int main(int argc, char ** argv) {
320
320
}
321
321
322
322
// prefix & suffix for instruct mode
323
- const auto inp_pfx = ::llama_tokenize (ctx, " \n\n ### Instruction:\n\n " , add_bos);
324
- const auto inp_sfx = ::llama_tokenize (ctx, " \n\n ### Response:\n\n " , false );
323
+ const auto inp_pfx = ::llama_tokenize (ctx, " \n\n ### Instruction:\n\n " , add_bos, true );
324
+ const auto inp_sfx = ::llama_tokenize (ctx, " \n\n ### Response:\n\n " , false , true );
325
325
326
326
LOG (" inp_pfx: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, inp_pfx));
327
327
LOG (" inp_sfx: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, inp_sfx));
@@ -383,6 +383,12 @@ int main(int argc, char ** argv) {
383
383
if (!params.antiprompt .empty ()) {
384
384
for (const auto & antiprompt : params.antiprompt ) {
385
385
LOG_TEE (" Reverse prompt: '%s'\n " , antiprompt.c_str ());
386
+ if (params.verbose_prompt ) {
387
+ auto tmp = ::llama_tokenize (ctx, antiprompt, false , true );
388
+ for (int i = 0 ; i < (int ) tmp.size (); i++) {
389
+ LOG_TEE (" %6d -> '%s'\n " , tmp[i], llama_token_to_piece (ctx, tmp[i]).c_str ());
390
+ }
391
+ }
386
392
}
387
393
}
388
394
@@ -392,10 +398,22 @@ int main(int argc, char ** argv) {
392
398
393
399
if (!params.input_prefix .empty ()) {
394
400
LOG_TEE (" Input prefix: '%s'\n " , params.input_prefix .c_str ());
401
+ if (params.verbose_prompt ) {
402
+ auto tmp = ::llama_tokenize (ctx, params.input_prefix , true , true );
403
+ for (int i = 0 ; i < (int ) tmp.size (); i++) {
404
+ LOG_TEE (" %6d -> '%s'\n " , tmp[i], llama_token_to_piece (ctx, tmp[i]).c_str ());
405
+ }
406
+ }
395
407
}
396
408
397
409
if (!params.input_suffix .empty ()) {
398
410
LOG_TEE (" Input suffix: '%s'\n " , params.input_suffix .c_str ());
411
+ if (params.verbose_prompt ) {
412
+ auto tmp = ::llama_tokenize (ctx, params.input_suffix , false , true );
413
+ for (int i = 0 ; i < (int ) tmp.size (); i++) {
414
+ LOG_TEE (" %6d -> '%s'\n " , tmp[i], llama_token_to_piece (ctx, tmp[i]).c_str ());
415
+ }
416
+ }
399
417
}
400
418
}
401
419
LOG_TEE (" sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n " ,
@@ -717,7 +735,7 @@ int main(int argc, char ** argv) {
717
735
if (params.interactive ) {
718
736
if (!params.antiprompt .empty ()) {
719
737
// tokenize and inject first reverse prompt
720
- const auto first_antiprompt = ::llama_tokenize (ctx, params.antiprompt .front (), false );
738
+ const auto first_antiprompt = ::llama_tokenize (ctx, params.antiprompt .front (), false , true );
721
739
embd_inp.insert (embd_inp.end (), first_antiprompt.begin (), first_antiprompt.end ());
722
740
is_antiprompt = true ;
723
741
}
@@ -744,8 +762,7 @@ int main(int argc, char ** argv) {
744
762
std::string buffer;
745
763
if (!params.input_prefix .empty ()) {
746
764
LOG (" appending input prefix: '%s'\n " , params.input_prefix .c_str ());
747
- buffer += params.input_prefix ;
748
- printf (" %s" , buffer.c_str ());
765
+ printf (" %s" , params.input_prefix .c_str ());
749
766
}
750
767
751
768
// color user input only
@@ -767,7 +784,6 @@ int main(int argc, char ** argv) {
767
784
// append input suffix if any
768
785
if (!params.input_suffix .empty ()) {
769
786
LOG (" appending input suffix: '%s'\n " , params.input_suffix .c_str ());
770
- buffer += params.input_suffix ;
771
787
printf (" %s" , params.input_suffix .c_str ());
772
788
}
773
789
@@ -782,10 +798,14 @@ int main(int argc, char ** argv) {
782
798
embd_inp.insert (embd_inp.end (), inp_pfx.begin (), inp_pfx.end ());
783
799
}
784
800
785
- const auto line_inp = ::llama_tokenize (ctx, buffer, false );
801
+ const auto line_pfx = ::llama_tokenize (ctx, params.input_prefix , false , true );
802
+ const auto line_inp = ::llama_tokenize (ctx, buffer, false , false );
803
+ const auto line_sfx = ::llama_tokenize (ctx, params.input_suffix , false , true );
786
804
LOG (" input tokens: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, line_inp));
787
805
806
+ embd_inp.insert (embd_inp.end (), line_pfx.begin (), line_pfx.end ());
788
807
embd_inp.insert (embd_inp.end (), line_inp.begin (), line_inp.end ());
808
+ embd_inp.insert (embd_inp.end (), line_sfx.begin (), line_sfx.end ());
789
809
790
810
// instruct mode: insert response suffix
791
811
if (params.instruct ) {
0 commit comments