@@ -284,13 +284,9 @@ int main(int argc, char ** argv) {
284
284
is_interacting = params.interactive_first ;
285
285
}
286
286
287
- bool is_antiprompt = false ;
288
- bool input_echo = true ;
289
-
290
- // HACK - because session saving incurs a non-negligible delay, for now skip re-saving session
291
- // if we loaded a session with at least 75% similarity. It's currently just used to speed up the
292
- // initial prompt so it doesn't need to be an exact match.
293
- bool need_to_save_session = !path_session.empty () && n_matching_session_tokens < (embd_inp.size () * 3 / 4 );
287
+ bool is_antiprompt = false ;
288
+ bool input_echo = true ;
289
+ bool need_to_save_session = !path_session.empty ();
294
290
295
291
296
292
int n_past = 0 ;
@@ -319,6 +315,10 @@ int main(int argc, char ** argv) {
319
315
embd.insert (embd.begin (), last_n_tokens.begin () + n_ctx - n_left/2 - embd.size (), last_n_tokens.end () - embd.size ());
320
316
321
317
// stop saving session if we run out of context
318
+ if (!path_session.empty () && params.session_full ) {
319
+ llama_save_session_file (ctx, path_session.c_str (),
320
+ session_tokens.data (), session_tokens.size ());
321
+ }
322
322
path_session = " " ;
323
323
324
324
// printf("\n---\n");
@@ -619,6 +619,11 @@ int main(int argc, char ** argv) {
619
619
}
620
620
}
621
621
622
+ if (!path_session.empty () && params.session_full ) {
623
+ fprintf (stderr, " \n %s: saving final output to session file '%s'\n " , __func__, path_session.c_str ());
624
+ llama_save_session_file (ctx, path_session.c_str (), session_tokens.data (), session_tokens.size ());
625
+ }
626
+
622
627
llama_print_timings (ctx);
623
628
llama_free (ctx);
624
629
0 commit comments