Skip to content

Commit f18cada

Browse files
YAML result logging + preset script
1 parent 230d46c commit f18cada

File tree

8 files changed

+694
-40
lines changed

8 files changed

+694
-40
lines changed

common/common.cpp

Lines changed: 318 additions & 9 deletions
Large diffs are not rendered by default.

common/common.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
#include <unordered_map>
1212
#include <tuple>
1313

14+
#ifdef _WIN32
15+
#define DIRECTORY_SEPARATOR '\\'
16+
#else
17+
#define DIRECTORY_SEPARATOR '/'
18+
#endif // _WIN32
19+
1420
//
1521
// CLI argument parsing
1622
//
@@ -61,6 +67,7 @@ struct gpt_params {
6167
std::string input_suffix = ""; // string to suffix user inputs with
6268
std::string grammar = ""; // optional BNF-like grammar to constrain sampling
6369
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
70+
std::string logdir = ""; // directory in which to save YAML log files
6471

6572
std::string lora_adapter = ""; // lora adapter path
6673
std::string lora_base = ""; // base model path for the lora adapter
@@ -82,6 +89,7 @@ struct gpt_params {
8289
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
8390

8491
bool embedding = false; // get only sentence embedding
92+
bool escape = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
8593
bool interactive_first = false; // wait for user input immediately
8694
bool multiline_input = false; // reverse the usage of `\`
8795
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
@@ -144,3 +152,13 @@ std::string llama_detokenize_spm(
144152
std::string llama_detokenize_bpe(
145153
llama_context * ctx,
146154
const std::vector<llama_token> & tokens);
155+
156+
bool create_directory_with_parents(const std::string & path);
157+
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data);
158+
void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data);
159+
void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data, bool remove_first);
160+
std::string get_sortable_timestamp();
161+
162+
void dump_non_result_info_yaml(
163+
FILE * stream, const gpt_params & params, const llama_context * lctx,
164+
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model);

examples/main/main.cpp

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <ctime>
1818
#include <fstream>
1919
#include <iostream>
20+
#include <sstream>
2021
#include <string>
2122
#include <vector>
2223

@@ -36,9 +37,57 @@
3637
#pragma warning(disable: 4244 4267) // possible loss of data
3738
#endif
3839

39-
static llama_context ** g_ctx;
40+
static llama_context ** g_ctx;
41+
static llama_model ** g_model;
42+
static gpt_params * g_params;
43+
static std::vector<llama_token> * g_input_tokens;
44+
static std::ostringstream * g_output_ss;
45+
static std::vector<llama_token> * g_output_tokens;
4046
static bool is_interacting = false;
4147

48+
void write_logfile(
49+
const llama_context * ctx, const gpt_params & params, const llama_model * model,
50+
const std::vector<llama_token> input_tokens, const std::string output, const std::vector<llama_token> output_tokens) {
51+
52+
if (params.logdir.empty()) {
53+
return;
54+
}
55+
56+
const std::string timestamp = get_sortable_timestamp();
57+
58+
const bool success = create_directory_with_parents(params.logdir);
59+
if (!success) {
60+
fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
61+
__func__, params.logdir.c_str());
62+
return;
63+
}
64+
65+
const std::string logfile_path = params.logdir + timestamp + ".yml";
66+
FILE * logfile = fopen(logfile_path.c_str(), "w");
67+
68+
if (logfile == NULL) {
69+
fprintf(stderr, "%s: failed to open logfile %s\n", __func__, logfile_path.c_str());
70+
return;
71+
}
72+
73+
fprintf(logfile, "binary: main\n");
74+
char model_type[128];
75+
llama_model_desc(model, model_type, sizeof(model_type));
76+
dump_non_result_info_yaml(logfile, params, ctx, timestamp, input_tokens, model_type);
77+
78+
fprintf(logfile, "\n");
79+
fprintf(logfile, "######################\n");
80+
fprintf(logfile, "# Generation Results #\n");
81+
fprintf(logfile, "######################\n");
82+
fprintf(logfile, "\n");
83+
84+
dump_string_yaml_multiline(logfile, "output", output.c_str(), false);
85+
dump_vector_int_yaml(logfile, "output_tokens", output_tokens);
86+
87+
llama_dump_timing_info_yaml(logfile, ctx);
88+
fclose(logfile);
89+
}
90+
4291
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
4392
void sigint_handler(int signo) {
4493
if (signo == SIGINT) {
@@ -48,6 +97,7 @@ void sigint_handler(int signo) {
4897
console::cleanup();
4998
printf("\n");
5099
llama_print_timings(*g_ctx);
100+
write_logfile(*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str(), *g_output_tokens);
51101
_exit(130);
52102
}
53103
}
@@ -56,6 +106,7 @@ void sigint_handler(int signo) {
56106

57107
int main(int argc, char ** argv) {
58108
gpt_params params;
109+
g_params = &params;
59110

60111
if (gpt_params_parse(argc, argv, params) == false) {
61112
return 1;
@@ -116,6 +167,7 @@ int main(int argc, char ** argv) {
116167
llama_model * model;
117168
llama_context * ctx;
118169
llama_context * ctx_guidance = NULL;
170+
g_model = &model;
119171
g_ctx = &ctx;
120172

121173
// load the model and apply lora adapter, if any
@@ -397,6 +449,10 @@ int main(int argc, char ** argv) {
397449
int n_session_consumed = 0;
398450
int n_past_guidance = 0;
399451

452+
std::vector<int> input_tokens; g_input_tokens = &input_tokens;
453+
std::vector<int> output_tokens; g_output_tokens = &output_tokens;
454+
std::ostringstream output_ss; g_output_ss = &output_ss;
455+
400456
// the first thing we will do is to output the prompt, so set color accordingly
401457
console::set_display(console::prompt);
402458

@@ -667,7 +723,15 @@ int main(int argc, char ** argv) {
667723
// display text
668724
if (input_echo) {
669725
for (auto id : embd) {
670-
printf("%s", llama_token_to_piece(ctx, id).c_str());
726+
const std::string token_str = llama_token_to_piece(ctx, id);
727+
printf("%s", token_str.c_str());
728+
729+
if (embd.size() > 1) {
730+
input_tokens.push_back(id);
731+
} else {
732+
output_tokens.push_back(id);
733+
output_ss << token_str;
734+
}
671735
}
672736
fflush(stdout);
673737
}
@@ -761,6 +825,8 @@ int main(int argc, char ** argv) {
761825
printf("%s", params.input_suffix.c_str());
762826
}
763827

828+
const size_t original_size = embd_inp.size();
829+
764830
// instruct mode: insert instruction prefix
765831
if (params.instruct && !is_antiprompt) {
766832
n_consumed = embd_inp.size();
@@ -775,6 +841,12 @@ int main(int argc, char ** argv) {
775841
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
776842
}
777843

844+
for (size_t i = original_size; i < embd_inp.size(); ++i) {
845+
const llama_token token = embd_inp[i];
846+
output_tokens.push_back(token);
847+
output_ss << llama_token_to_piece(ctx, token);
848+
}
849+
778850
n_remain -= line_inp.size();
779851
}
780852

@@ -817,6 +889,8 @@ int main(int argc, char ** argv) {
817889
}
818890

819891
llama_print_timings(ctx);
892+
write_logfile(ctx, params, model, input_tokens, output_ss.str(), output_tokens);
893+
820894
if (ctx_guidance) { llama_free(ctx_guidance); }
821895
llama_free(ctx);
822896
llama_free_model(model);

0 commit comments

Comments
 (0)