@@ -114,7 +114,16 @@ int main(int argc, char ** argv) {
114
114
}
115
115
116
116
// tokenize the reverse prompt
117
- std::vector<gpt_vocab::id> antiprompt_inp = llama_tokenize_text (ctx, params.antiprompt );
117
+ std::vector<std::vector<gpt_vocab::id>> antipromptv_inp;
118
+
119
+ for (auto antiprompt : params.antiprompt ) {
120
+ antipromptv_inp.push_back (::llama_tokenize (vocab, antiprompt, false ));
121
+ }
122
+
123
+ // enable interactive mode if reverse prompt is specified
124
+ if (!antipromptv_inp.size ()) {
125
+ params.interactive = true ;
126
+ }
118
127
119
128
// Setup interactive mode
120
129
if (params.interactive ) {
@@ -182,26 +191,23 @@ int main(int argc, char ** argv) {
182
191
183
192
if (llama_has_unconsumed_input (ctx)) {
184
193
llama_ingest_all_pending_input (ctx, !input_noecho);
185
- // reset color to default if we there is no pending user input
186
- if (!input_noecho && params.use_color ) {
187
- printf (ANSI_COLOR_RESET);
188
- }
189
194
}else {
190
195
// Run inference if we don't have any pending input
191
196
llama_infer (ctx, model_output, is_end_of_text);
192
197
// print the single token output
193
198
printf (" %s" , model_output.c_str ());
194
199
input_noecho = false ;
195
200
}
196
- // reset color to default if we there is no pending user input
197
- if (!input_noecho && params.use_color && ( int )embd_inp. size () == input_consumed ) {
201
+ // reset color to default (all input will be ingested already at this point)
202
+ if (!input_noecho && params.use_color ) {
198
203
printf (ANSI_COLOR_RESET);
199
204
}
200
205
201
206
// in interactive mode, and not currently processing queued inputs;
202
207
// check if we should prompt the user for more
203
208
if (params.interactive && !llama_has_unconsumed_input (ctx)) {
204
- // check for reverse prompt
209
+ // check for reverse prompt
210
+ for (auto antiprompt_inp : antipromptv_inp) {
205
211
if (antiprompt_inp.size () && llama_is_anti_prompt_present (ctx, antiprompt_inp)) {
206
212
// reverse prompt found
207
213
is_interacting = true ;
0 commit comments