17
17
#include < ctime>
18
18
#include < fstream>
19
19
#include < iostream>
20
+ #include < sstream>
20
21
#include < string>
21
22
#include < vector>
22
23
36
37
#pragma warning(disable: 4244 4267) // possible loss of data
37
38
#endif
38
39
39
- static llama_context ** g_ctx;
40
+ static llama_context ** g_ctx;
41
+ static llama_model ** g_model;
42
+ static gpt_params * g_params;
43
+ static std::vector<llama_token> * g_input_tokens;
44
+ static std::ostringstream * g_output_ss;
45
+ static std::vector<llama_token> * g_output_tokens;
40
46
static bool is_interacting = false ;
41
47
48
+ void write_logfile (
49
+ const llama_context * ctx, const gpt_params & params, const llama_model * model,
50
+ const std::vector<llama_token> input_tokens, const std::string output, const std::vector<llama_token> output_tokens) {
51
+
52
+ if (params.logdir .empty ()) {
53
+ return ;
54
+ }
55
+
56
+ const std::string timestamp = get_sortable_timestamp ();
57
+
58
+ const bool success = create_directory_with_parents (params.logdir );
59
+ if (!success) {
60
+ fprintf (stderr, " %s: warning: failed to create logdir %s, cannot write logfile\n " ,
61
+ __func__, params.logdir .c_str ());
62
+ return ;
63
+ }
64
+
65
+ const std::string logfile_path = params.logdir + timestamp + " .yml" ;
66
+ FILE * logfile = fopen (logfile_path.c_str (), " w" );
67
+
68
+ if (logfile == NULL ) {
69
+ fprintf (stderr, " %s: failed to open logfile %s\n " , __func__, logfile_path.c_str ());
70
+ return ;
71
+ }
72
+
73
+ fprintf (logfile, " binary: main\n " );
74
+ char model_type[128 ];
75
+ llama_model_desc (model, model_type, sizeof (model_type));
76
+ dump_non_result_info_yaml (logfile, params, ctx, timestamp, input_tokens, model_type);
77
+
78
+ fprintf (logfile, " \n " );
79
+ fprintf (logfile, " ######################\n " );
80
+ fprintf (logfile, " # Generation Results #\n " );
81
+ fprintf (logfile, " ######################\n " );
82
+ fprintf (logfile, " \n " );
83
+
84
+ dump_string_yaml_multiline (logfile, " output" , output.c_str (), false );
85
+ dump_vector_int_yaml (logfile, " output_tokens" , output_tokens);
86
+
87
+ llama_dump_timing_info_yaml (logfile, ctx);
88
+ fclose (logfile);
89
+ }
90
+
42
91
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
43
92
void sigint_handler (int signo) {
44
93
if (signo == SIGINT) {
@@ -48,6 +97,7 @@ void sigint_handler(int signo) {
48
97
console::cleanup ();
49
98
printf (" \n " );
50
99
llama_print_timings (*g_ctx);
100
+ write_logfile (*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str (), *g_output_tokens);
51
101
_exit (130 );
52
102
}
53
103
}
@@ -56,6 +106,7 @@ void sigint_handler(int signo) {
56
106
57
107
int main (int argc, char ** argv) {
58
108
gpt_params params;
109
+ g_params = ¶ms;
59
110
60
111
if (gpt_params_parse (argc, argv, params) == false ) {
61
112
return 1 ;
@@ -116,6 +167,7 @@ int main(int argc, char ** argv) {
116
167
llama_model * model;
117
168
llama_context * ctx;
118
169
llama_context * ctx_guidance = NULL ;
170
+ g_model = &model;
119
171
g_ctx = &ctx;
120
172
121
173
// load the model and apply lora adapter, if any
@@ -397,6 +449,10 @@ int main(int argc, char ** argv) {
397
449
int n_session_consumed = 0 ;
398
450
int n_past_guidance = 0 ;
399
451
452
+ std::vector<int > input_tokens; g_input_tokens = &input_tokens;
453
+ std::vector<int > output_tokens; g_output_tokens = &output_tokens;
454
+ std::ostringstream output_ss; g_output_ss = &output_ss;
455
+
400
456
// the first thing we will do is to output the prompt, so set color accordingly
401
457
console::set_display (console::prompt);
402
458
@@ -667,7 +723,15 @@ int main(int argc, char ** argv) {
667
723
// display text
668
724
if (input_echo) {
669
725
for (auto id : embd) {
670
- printf (" %s" , llama_token_to_piece (ctx, id).c_str ());
726
+ const std::string token_str = llama_token_to_piece (ctx, id);
727
+ printf (" %s" , token_str.c_str ());
728
+
729
+ if (embd.size () > 1 ) {
730
+ input_tokens.push_back (id);
731
+ } else {
732
+ output_tokens.push_back (id);
733
+ output_ss << token_str;
734
+ }
671
735
}
672
736
fflush (stdout);
673
737
}
@@ -761,6 +825,8 @@ int main(int argc, char ** argv) {
761
825
printf (" %s" , params.input_suffix .c_str ());
762
826
}
763
827
828
+ const size_t original_size = embd_inp.size ();
829
+
764
830
// instruct mode: insert instruction prefix
765
831
if (params.instruct && !is_antiprompt) {
766
832
n_consumed = embd_inp.size ();
@@ -775,6 +841,12 @@ int main(int argc, char ** argv) {
775
841
embd_inp.insert (embd_inp.end (), inp_sfx.begin (), inp_sfx.end ());
776
842
}
777
843
844
+ for (size_t i = original_size; i < embd_inp.size (); ++i) {
845
+ const llama_token token = embd_inp[i];
846
+ output_tokens.push_back (token);
847
+ output_ss << llama_token_to_piece (ctx, token);
848
+ }
849
+
778
850
n_remain -= line_inp.size ();
779
851
}
780
852
@@ -817,6 +889,8 @@ int main(int argc, char ** argv) {
817
889
}
818
890
819
891
llama_print_timings (ctx);
892
+ write_logfile (ctx, params, model, input_tokens, output_ss.str (), output_tokens);
893
+
820
894
if (ctx_guidance) { llama_free (ctx_guidance); }
821
895
llama_free (ctx);
822
896
llama_free_model (model);
0 commit comments