Skip to content

Commit f325608

Browse files
committed
common : rework usage print (wip)
1 parent 036813c commit f325608

File tree

9 files changed

+106
-145
lines changed

9 files changed

+106
-145
lines changed

common/common.cpp

Lines changed: 89 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
289289
invalid_param = true;
290290
return true;
291291
}
292-
// This is temporary, in the future the samplign state will be moved fully to llama_sampling_context.
292+
// TODO: this is temporary, in the future the sampling state will be moved fully to llama_sampling_context.
293293
params.seed = std::stoul(argv[i]);
294294
sparams.seed = std::stoul(argv[i]);
295295
return true;
@@ -901,19 +901,15 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
901901
params.interactive = true;
902902
return true;
903903
}
904-
if (arg == "--interactive-specials") {
905-
params.interactive_specials = true;
906-
return true;
907-
}
908-
if (arg == "--special") {
904+
if (arg == "-sp" || arg == "--special") {
909905
params.special = true;
910906
return true;
911907
}
912908
if (arg == "--embedding") {
913909
params.embedding = true;
914910
return true;
915911
}
916-
if (arg == "--interactive-first") {
912+
if (arg == "-if" || arg == "--interactive-first") {
917913
params.interactive_first = true;
918914
return true;
919915
}
@@ -965,7 +961,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
965961
params.flash_attn = true;
966962
return true;
967963
}
968-
if (arg == "--color") {
964+
if (arg == "-co" || arg == "--color") {
969965
params.use_color = true;
970966
return true;
971967
}
@@ -1252,10 +1248,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
12521248
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
12531249
exit(0);
12541250
}
1255-
if (arg == "--random-prompt") {
1256-
params.random_prompt = true;
1257-
return true;
1258-
}
12591251
if (arg == "--in-prefix-bos") {
12601252
params.input_prefix_bos = true;
12611253
return true;
@@ -1349,6 +1341,16 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
13491341
return false;
13501342
}
13511343

1344+
#ifdef __GNUC__
1345+
#ifdef __MINGW32__
1346+
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
1347+
#else
1348+
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
1349+
#endif
1350+
#else
1351+
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
1352+
#endif
1353+
13521354
void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
13531355
const llama_sampling_params & sparams = params.sparams;
13541356

@@ -1360,52 +1362,83 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
13601362
}
13611363
sampler_type_names.pop_back();
13621364

1365+
struct option_info {
1366+
LLAMA_COMMON_ATTRIBUTE_FORMAT(4, 5)
1367+
option_info(const std::string & tags, const char * args, const char * desc, ...) : tags(tags), args(args), desc(desc) {
1368+
va_list args_list;
1369+
va_start(args_list, desc);
1370+
char buffer[1024];
1371+
vsnprintf(buffer, sizeof(buffer), desc, args_list);
1372+
va_end(args_list);
1373+
this->desc = buffer;
1374+
}
1375+
1376+
std::string tags;
1377+
std::string args;
1378+
std::string desc;
1379+
};
1380+
1381+
std::vector<option_info> options;
1382+
1383+
// TODO: filter by tags
1384+
1385+
options.push_back({ "*", "-h, --help, --usage", "print usage and exit" });
1386+
options.push_back({ "*", " --version", "show version and build info" });
1387+
options.push_back({ "*", "-co, --color", "colorise output to distinguish prompt and user input from generations (default: %s)", params.use_color ? "true" : "false" });
1388+
options.push_back({ "*", "-s, --seed SEED", "RNG seed (default: %d, use random seed for < 0)", params.seed });
1389+
options.push_back({ "*", "-t, --threads N", "number of threads to use during generation (default: %d)", params.n_threads });
1390+
options.push_back({ "*", "-tb, --threads-batch N", "number of threads to use during batch and prompt processing (default: same as --threads)" });
1391+
options.push_back({ "speculative", "-td, --threads-draft N", "number of threads to use during generation (default: same as --threads)" });
1392+
options.push_back({ "speculative", "-tbd, --threads-batch-draft N", "number of threads to use during batch and prompt processing (default: same as --threads-draft)" });
1393+
options.push_back({ "*", "-c, --ctx-size N", "size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx });
1394+
options.push_back({ "*", "-n, --n-predict N", "number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)", params.n_predict });
1395+
options.push_back({ "*", "-b, --batch-size N", "logical maximum batch size (default: %d)", params.n_batch });
1396+
options.push_back({ "*", "-ub, --ubatch-size N", "physical maximum batch size (default: %d)", params.n_ubatch });
1397+
options.push_back({ "*", "-p, --prompt PROMPT", "prompt to start generation with (default: empty)" });
1398+
options.push_back({ "*", "-f, --file FNAME", "a file containing the prompt (default: none)" });
1399+
options.push_back({ "*", "-bf, --binary-file FNAME", "binary file containing the prompt (default: none)" });
1400+
options.push_back({ "*", "-e, --escape", "process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)" });
1401+
options.push_back({ "main", " --prompt-cache FNAME", "file to cache prompt state for faster startup (default: none)" });
1402+
options.push_back({ "main", " --prompt-cache-all", "if specified, saves user input and generations to cache as well\nnot supported with --interactive or other interactive options" });
1403+
options.push_back({ "main", " --prompt-cache-ro", "if specified, uses the prompt cache but does not update it" });
1404+
options.push_back({ "main", "-r, --reverse-prompt PROMPT", "halt generation at PROMPT, return control in interactive mode\ncan be specified more than once for multiple prompts" });
1405+
1406+
options.push_back({ "main", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
1407+
options.push_back({ "main", "-cnv, --conversation", "run in conversation mode (does not print special tokens and suffix/prefix) (default: %s)", params.conversation ? "true" : "false" });
1408+
options.push_back({ "main", "-ins, --instruct", "run in instruction mode (use with Alpaca models) (default: %s)", params.instruct ? "true" : "false" });
1409+
options.push_back({ "main", "-cml, --chatml", "run in chatml mode (use with ChatML-compatible models) (default: %s)", params.chatml ? "true" : "false" });
1410+
options.push_back({ "main infill", "-i, --interactive", "run in interactive mode (default: %s)", params.interactive ? "true" : "false" });
1411+
options.push_back({ "main infill", "-if, --interactive-first", "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" });
1412+
options.push_back({ "main infill", "-mli, --multiline-input", "allows you to write or paste multiple lines without ending each in '\\'" });
1413+
options.push_back({ "main infill", " --in-prefix-bos", "prefix BOS to user inputs, preceding the `--in-prefix` string" });
1414+
options.push_back({ "main infill", " --in-prefix STRING", "string to prefix user inputs with (default: empty)" });
1415+
options.push_back({ "main infill", " --in-suffix STRING", "string to suffix after user inputs with (default: empty)" });
1416+
13631417
printf("\n");
13641418
printf("usage: %s [options]\n", argv[0]);
13651419
printf("\n");
1366-
printf("options:\n");
1367-
printf(" -h, --help, --usage print usage and exit\n");
1368-
printf(" --version show version and build info\n");
1369-
printf(" -i, --interactive run in interactive mode\n");
1370-
printf(" --special special tokens output enabled\n");
1371-
printf(" --interactive-specials allow special tokens in user text, in interactive mode\n");
1372-
printf(" --interactive-first run in interactive mode and wait for input right away\n");
1373-
printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n");
1374-
printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n");
1375-
printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n");
1376-
printf(" --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n");
1377-
printf(" -r PROMPT, --reverse-prompt PROMPT\n");
1378-
printf(" halt generation at PROMPT, return control in interactive mode\n");
1379-
printf(" (can be specified more than once for multiple prompts).\n");
1380-
printf(" --color colorise output to distinguish prompt and user input from generations\n");
1381-
printf(" -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
1382-
printf(" -t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads);
1383-
printf(" -tb N, --threads-batch N\n");
1384-
printf(" number of threads to use during batch and prompt processing (default: same as --threads)\n");
1385-
printf(" -td N, --threads-draft N");
1386-
printf(" number of threads to use during generation (default: same as --threads)\n");
1387-
printf(" -tbd N, --threads-batch-draft N\n");
1388-
printf(" number of threads to use during batch and prompt processing (default: same as --threads-draft)\n");
1389-
printf(" -p PROMPT, --prompt PROMPT\n");
1390-
printf(" prompt to start generation with (default: empty)\n");
1391-
printf(" -e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
1392-
printf(" --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n");
1393-
printf(" --prompt-cache-all if specified, saves user input and generations to cache as well.\n");
1394-
printf(" not supported with --interactive or other interactive options\n");
1395-
printf(" --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n");
1396-
printf(" --random-prompt start with a randomized prompt.\n");
1397-
printf(" --in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string\n");
1398-
printf(" --in-prefix STRING string to prefix user inputs with (default: empty)\n");
1399-
printf(" --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
1400-
printf(" -f FNAME, --file FNAME\n");
1401-
printf(" prompt file to start generation.\n");
1402-
printf(" -bf FNAME, --binary-file FNAME\n");
1403-
printf(" binary file containing multiple choice tasks.\n");
1404-
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
1405-
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
1406-
printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
1407-
printf(" -ub N, --ubatch-size N\n");
1408-
printf(" physical maximum batch size (default: %d)\n", params.n_ubatch);
1420+
printf("options:\n\n");
1421+
1422+
for (const auto & o : options) {
1423+
printf(" %-32s", o.args.c_str());
1424+
if (o.args.length() > 34) {
1425+
printf("\n%34s", "");
1426+
}
1427+
1428+
//printf("%s\n", o.desc.c_str());
1429+
// print line by line and pad with spaces
1430+
const auto desc = o.desc;
1431+
size_t start = 0;
1432+
size_t end = desc.find('\n');
1433+
while (end != std::string::npos) {
1434+
printf("%s\n%34s", desc.substr(start, end - start).c_str(), "");
1435+
start = end + 1;
1436+
end = desc.find('\n', start);
1437+
}
1438+
1439+
printf("%s\n", desc.substr(start).c_str());
1440+
}
1441+
14091442
printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n");
14101443
printf(" (default: %s)\n", sampler_type_names.c_str());
14111444
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
@@ -1549,6 +1582,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
15491582
printf(" print token count every N tokens (default: %d)\n", params.n_print);
15501583
printf(" --check-tensors check model tensor data for invalid values\n");
15511584
printf("\n");
1585+
15521586
#ifndef LOG_DISABLE_LOGS
15531587
log_print_usage();
15541588
#endif // LOG_DISABLE_LOGS
@@ -1611,24 +1645,6 @@ std::string string_get_sortable_timestamp() {
16111645
return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
16121646
}
16131647

1614-
std::string string_random_prompt(std::mt19937 & rng) {
1615-
const int r = rng() % 10;
1616-
switch (r) {
1617-
case 0: return "So";
1618-
case 1: return "Once upon a time";
1619-
case 2: return "When";
1620-
case 3: return "The";
1621-
case 4: return "After";
1622-
case 5: return "If";
1623-
case 6: return "import";
1624-
case 7: return "He";
1625-
case 8: return "She";
1626-
case 9: return "They";
1627-
}
1628-
1629-
GGML_UNREACHABLE();
1630-
}
1631-
16321648
void string_process_escapes(std::string & input) {
16331649
std::size_t input_len = input.length();
16341650
std::size_t output_idx = 0;
@@ -2906,7 +2922,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
29062922
yaml_dump_string_multiline(stream, "in_suffix", params.input_prefix.c_str());
29072923
fprintf(stream, "instruct: %s # default: false\n", params.instruct ? "true" : "false");
29082924
fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false");
2909-
fprintf(stream, "interactive_specials: %s # default: false\n", params.interactive_specials ? "true" : "false");
29102925
fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false");
29112926
fprintf(stream, "keep: %d # default: 0\n", params.n_keep);
29122927
fprintf(stream, "logdir: %s # default: unset (no logging)\n", params.logdir.c_str());
@@ -2956,7 +2971,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
29562971
fprintf(stream, "prompt_cache_all: %s # default: false\n", params.prompt_cache_all ? "true" : "false");
29572972
fprintf(stream, "prompt_cache_ro: %s # default: false\n", params.prompt_cache_ro ? "true" : "false");
29582973
yaml_dump_vector_int(stream, "prompt_tokens", prompt_tokens);
2959-
fprintf(stream, "random_prompt: %s # default: false\n", params.random_prompt ? "true" : "false");
29602974
fprintf(stream, "repeat_penalty: %f # default: 1.1\n", sparams.penalty_repeat);
29612975

29622976
fprintf(stream, "reverse_prompt:\n");

common/common.h

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -99,23 +99,23 @@ struct gpt_params {
9999
// // sampling parameters
100100
struct llama_sampling_params sparams;
101101

102-
std::string model = ""; // model path
103-
std::string model_draft = ""; // draft model for speculative decoding
102+
std::string model = ""; // model path
103+
std::string model_draft = ""; // draft model for speculative decoding
104104
std::string model_alias = "unknown"; // model alias
105-
std::string model_url = ""; // model url to download
106-
std::string hf_repo = ""; // HF repo
107-
std::string hf_file = ""; // HF file
105+
std::string model_url = ""; // model url to download
106+
std::string hf_repo = ""; // HF repo
107+
std::string hf_file = ""; // HF file
108108
std::string prompt = "";
109-
std::string prompt_file = ""; // store the external prompt file name
110-
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
111-
std::string input_prefix = ""; // string to prefix user inputs with
112-
std::string input_suffix = ""; // string to suffix user inputs with
113-
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
114-
std::string logdir = ""; // directory in which to save YAML log files
109+
std::string prompt_file = ""; // store the external prompt file name
110+
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
111+
std::string input_prefix = ""; // string to prefix user inputs with
112+
std::string input_suffix = ""; // string to suffix user inputs with
113+
std::string logdir = ""; // directory in which to save YAML log files
115114
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
116115
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
117-
std::string logits_file = ""; // file for saving *all* logits
116+
std::string logits_file = ""; // file for saving *all* logits
118117

118+
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
119119
std::vector<llama_model_kv_override> kv_overrides;
120120

121121
// TODO: avoid tuple, use struct
@@ -143,19 +143,17 @@ struct gpt_params {
143143
bool kl_divergence = false; // compute KL divergence
144144

145145
bool usage = false; // print usage
146-
bool random_prompt = false; // do not randomize prompt if none provided
147146
bool use_color = false; // use color to distinguish generations and inputs
148-
bool interactive = false; // interactive mode
149-
bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode
150147
bool special = false; // enable special token output
148+
bool interactive = false; // interactive mode
149+
bool interactive_first = false; // wait for user input immediately
151150
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
152151
bool chatml = false; // chatml mode (used for models trained on chatml syntax)
153152
bool prompt_cache_all = false; // save user input and generations to prompt cache
154153
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
155154

156155
bool embedding = false; // get only sentence embedding
157156
bool escape = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
158-
bool interactive_first = false; // wait for user input immediately
159157
bool multiline_input = false; // reverse the usage of `\`
160158
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
161159
bool cont_batching = true; // insert new sequences for decoding on-the-fly
@@ -200,7 +198,6 @@ std::vector<std::string> string_split(std::string input, char separator);
200198

201199
std::string string_strip(const std::string & str);
202200
std::string string_get_sortable_timestamp();
203-
std::string string_random_prompt(std::mt19937 & rng);
204201

205202
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
206203
void string_process_escapes(std::string & input);

examples/embedding/embedding.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,6 @@ int main(int argc, char ** argv) {
8080
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
8181

8282
std::mt19937 rng(params.seed);
83-
if (params.random_prompt) {
84-
params.prompt = string_random_prompt(rng);
85-
}
8683

8784
llama_backend_init();
8885
llama_numa_init(params.numa);

examples/eval-callback/eval-callback.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,6 @@ int main(int argc, char ** argv) {
152152
print_build_info();
153153

154154
std::mt19937 rng(params.seed);
155-
if (params.random_prompt) {
156-
params.prompt = string_random_prompt(rng);
157-
}
158155

159156
llama_backend_init();
160157
llama_numa_init(params.numa);

examples/imatrix/imatrix.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -598,9 +598,6 @@ int main(int argc, char ** argv) {
598598
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
599599

600600
std::mt19937 rng(params.seed);
601-
if (params.random_prompt) {
602-
params.prompt = string_random_prompt(rng);
603-
}
604601

605602
sparams.dataset = params.prompt_file;
606603
g_collector.set_parameters(std::move(sparams));

0 commit comments

Comments
 (0)