@@ -289,7 +289,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
289
289
invalid_param = true ;
290
290
return true ;
291
291
}
292
- // This is temporary, in the future the samplign state will be moved fully to llama_sampling_context.
292
+ // TODO: this is temporary, in the future the sampling state will be moved fully to llama_sampling_context.
293
293
params.seed = std::stoul (argv[i]);
294
294
sparams.seed = std::stoul (argv[i]);
295
295
return true ;
@@ -901,19 +901,15 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
901
901
params.interactive = true ;
902
902
return true ;
903
903
}
904
- if (arg == " --interactive-specials" ) {
905
- params.interactive_specials = true ;
906
- return true ;
907
- }
908
- if (arg == " --special" ) {
904
+ if (arg == " -sp" || arg == " --special" ) {
909
905
params.special = true ;
910
906
return true ;
911
907
}
912
908
if (arg == " --embedding" ) {
913
909
params.embedding = true ;
914
910
return true ;
915
911
}
916
- if (arg == " --interactive-first" ) {
912
+ if (arg == " -if " || arg == " - -interactive-first" ) {
917
913
params.interactive_first = true ;
918
914
return true ;
919
915
}
@@ -965,7 +961,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
965
961
params.flash_attn = true ;
966
962
return true ;
967
963
}
968
- if (arg == " --color" ) {
964
+ if (arg == " -co " || arg == " - -color" ) {
969
965
params.use_color = true ;
970
966
return true ;
971
967
}
@@ -1252,10 +1248,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
1252
1248
fprintf (stderr, " built with %s for %s\n " , LLAMA_COMPILER, LLAMA_BUILD_TARGET);
1253
1249
exit (0 );
1254
1250
}
1255
- if (arg == " --random-prompt" ) {
1256
- params.random_prompt = true ;
1257
- return true ;
1258
- }
1259
1251
if (arg == " --in-prefix-bos" ) {
1260
1252
params.input_prefix_bos = true ;
1261
1253
return true ;
@@ -1349,6 +1341,16 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
1349
1341
return false ;
1350
1342
}
1351
1343
1344
+ #ifdef __GNUC__
1345
+ #ifdef __MINGW32__
1346
+ #define LLAMA_COMMON_ATTRIBUTE_FORMAT (...) __attribute__((format(gnu_printf, __VA_ARGS__)))
1347
+ #else
1348
+ #define LLAMA_COMMON_ATTRIBUTE_FORMAT (...) __attribute__((format(printf, __VA_ARGS__)))
1349
+ #endif
1350
+ #else
1351
+ #define LLAMA_COMMON_ATTRIBUTE_FORMAT (...)
1352
+ #endif
1353
+
1352
1354
void gpt_params_print_usage (int /* argc*/ , char ** argv, const gpt_params & params) {
1353
1355
const llama_sampling_params & sparams = params.sparams ;
1354
1356
@@ -1360,52 +1362,83 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
1360
1362
}
1361
1363
sampler_type_names.pop_back ();
1362
1364
1365
+ struct option_info {
1366
+ LLAMA_COMMON_ATTRIBUTE_FORMAT (4 , 5 )
1367
+ option_info (const std::string & tags, const char * args, const char * desc, ...) : tags(tags), args(args), desc(desc) {
1368
+ va_list args_list;
1369
+ va_start (args_list, desc);
1370
+ char buffer[1024 ];
1371
+ vsnprintf (buffer, sizeof (buffer), desc, args_list);
1372
+ va_end (args_list);
1373
+ this ->desc = buffer;
1374
+ }
1375
+
1376
+ std::string tags;
1377
+ std::string args;
1378
+ std::string desc;
1379
+ };
1380
+
1381
+ std::vector<option_info> options;
1382
+
1383
+ // TODO: filter by tags
1384
+
1385
+ options.push_back ({ " *" , " -h, --help, --usage" , " print usage and exit" });
1386
+ options.push_back ({ " *" , " --version" , " show version and build info" });
1387
+ options.push_back ({ " *" , " -co, --color" , " colorise output to distinguish prompt and user input from generations (default: %s)" , params.use_color ? " true" : " false" });
1388
+ options.push_back ({ " *" , " -s, --seed SEED" , " RNG seed (default: %d, use random seed for < 0)" , params.seed });
1389
+ options.push_back ({ " *" , " -t, --threads N" , " number of threads to use during generation (default: %d)" , params.n_threads });
1390
+ options.push_back ({ " *" , " -tb, --threads-batch N" , " number of threads to use during batch and prompt processing (default: same as --threads)" });
1391
+ options.push_back ({ " speculative" , " -td, --threads-draft N" , " number of threads to use during generation (default: same as --threads)" });
1392
+ options.push_back ({ " speculative" , " -tbd, --threads-batch-draft N" , " number of threads to use during batch and prompt processing (default: same as --threads-draft)" });
1393
+ options.push_back ({ " *" , " -c, --ctx-size N" , " size of the prompt context (default: %d, 0 = loaded from model)" , params.n_ctx });
1394
+ options.push_back ({ " *" , " -n, --n-predict N" , " number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)" , params.n_predict });
1395
+ options.push_back ({ " *" , " -b, --batch-size N" , " logical maximum batch size (default: %d)" , params.n_batch });
1396
+ options.push_back ({ " *" , " -ub, --ubatch-size N" , " physical maximum batch size (default: %d)" , params.n_ubatch });
1397
+ options.push_back ({ " *" , " -p, --prompt PROMPT" , " prompt to start generation with (default: empty)" });
1398
+ options.push_back ({ " *" , " -f, --file FNAME" , " a file containing the prompt (default: none)" });
1399
+ options.push_back ({ " *" , " -bf, --binary-file FNAME" , " binary file containing the prompt (default: none)" });
1400
+ options.push_back ({ " *" , " -e, --escape" , " process escapes sequences (\\ n, \\ r, \\ t, \\ ', \\\" , \\\\ )" });
1401
+ options.push_back ({ " main" , " --prompt-cache FNAME" , " file to cache prompt state for faster startup (default: none)" });
1402
+ options.push_back ({ " main" , " --prompt-cache-all" , " if specified, saves user input and generations to cache as well\n not supported with --interactive or other interactive options" });
1403
+ options.push_back ({ " main" , " --prompt-cache-ro" , " if specified, uses the prompt cache but does not update it" });
1404
+ options.push_back ({ " main" , " -r, --reverse-prompt PROMPT" , " halt generation at PROMPT, return control in interactive mode\n can be specified more than once for multiple prompts" });
1405
+
1406
+ options.push_back ({ " main" , " -sp, --special" , " special tokens output enabled (default: %s)" , params.special ? " true" : " false" });
1407
+ options.push_back ({ " main" , " -cnv, --conversation" , " run in conversation mode (does not print special tokens and suffix/prefix) (default: %s)" , params.conversation ? " true" : " false" });
1408
+ options.push_back ({ " main" , " -ins, --instruct" , " run in instruction mode (use with Alpaca models) (default: %s)" , params.instruct ? " true" : " false" });
1409
+ options.push_back ({ " main" , " -cml, --chatml" , " run in chatml mode (use with ChatML-compatible models) (default: %s)" , params.chatml ? " true" : " false" });
1410
+ options.push_back ({ " main infill" , " -i, --interactive" , " run in interactive mode (default: %s)" , params.interactive ? " true" : " false" });
1411
+ options.push_back ({ " main infill" , " -if, --interactive-first" , " run in interactive mode and wait for input right away (default: %s)" , params.interactive_first ? " true" : " false" });
1412
+ options.push_back ({ " main infill" , " -mli, --multiline-input" , " allows you to write or paste multiple lines without ending each in '\\ '" });
1413
+ options.push_back ({ " main infill" , " --in-prefix-bos" , " prefix BOS to user inputs, preceding the `--in-prefix` string" });
1414
+ options.push_back ({ " main infill" , " --in-prefix STRING" , " string to prefix user inputs with (default: empty)" });
1415
+ options.push_back ({ " main infill" , " --in-suffix STRING" , " string to suffix after user inputs with (default: empty)" });
1416
+
1363
1417
printf (" \n " );
1364
1418
printf (" usage: %s [options]\n " , argv[0 ]);
1365
1419
printf (" \n " );
1366
- printf (" options:\n " );
1367
- printf (" -h, --help, --usage print usage and exit\n " );
1368
- printf (" --version show version and build info\n " );
1369
- printf (" -i, --interactive run in interactive mode\n " );
1370
- printf (" --special special tokens output enabled\n " );
1371
- printf (" --interactive-specials allow special tokens in user text, in interactive mode\n " );
1372
- printf (" --interactive-first run in interactive mode and wait for input right away\n " );
1373
- printf (" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n " );
1374
- printf (" -ins, --instruct run in instruction mode (use with Alpaca models)\n " );
1375
- printf (" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n " );
1376
- printf (" --multiline-input allows you to write or paste multiple lines without ending each in '\\ '\n " );
1377
- printf (" -r PROMPT, --reverse-prompt PROMPT\n " );
1378
- printf (" halt generation at PROMPT, return control in interactive mode\n " );
1379
- printf (" (can be specified more than once for multiple prompts).\n " );
1380
- printf (" --color colorise output to distinguish prompt and user input from generations\n " );
1381
- printf (" -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n " );
1382
- printf (" -t N, --threads N number of threads to use during generation (default: %d)\n " , params.n_threads );
1383
- printf (" -tb N, --threads-batch N\n " );
1384
- printf (" number of threads to use during batch and prompt processing (default: same as --threads)\n " );
1385
- printf (" -td N, --threads-draft N" );
1386
- printf (" number of threads to use during generation (default: same as --threads)\n " );
1387
- printf (" -tbd N, --threads-batch-draft N\n " );
1388
- printf (" number of threads to use during batch and prompt processing (default: same as --threads-draft)\n " );
1389
- printf (" -p PROMPT, --prompt PROMPT\n " );
1390
- printf (" prompt to start generation with (default: empty)\n " );
1391
- printf (" -e, --escape process prompt escapes sequences (\\ n, \\ r, \\ t, \\ ', \\\" , \\\\ )\n " );
1392
- printf (" --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n " );
1393
- printf (" --prompt-cache-all if specified, saves user input and generations to cache as well.\n " );
1394
- printf (" not supported with --interactive or other interactive options\n " );
1395
- printf (" --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n " );
1396
- printf (" --random-prompt start with a randomized prompt.\n " );
1397
- printf (" --in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string\n " );
1398
- printf (" --in-prefix STRING string to prefix user inputs with (default: empty)\n " );
1399
- printf (" --in-suffix STRING string to suffix after user inputs with (default: empty)\n " );
1400
- printf (" -f FNAME, --file FNAME\n " );
1401
- printf (" prompt file to start generation.\n " );
1402
- printf (" -bf FNAME, --binary-file FNAME\n " );
1403
- printf (" binary file containing multiple choice tasks.\n " );
1404
- printf (" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n " , params.n_predict );
1405
- printf (" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n " , params.n_ctx );
1406
- printf (" -b N, --batch-size N logical maximum batch size (default: %d)\n " , params.n_batch );
1407
- printf (" -ub N, --ubatch-size N\n " );
1408
- printf (" physical maximum batch size (default: %d)\n " , params.n_ubatch );
1420
+ printf (" options:\n\n " );
1421
+
1422
+ for (const auto & o : options) {
1423
+ printf (" %-32s" , o.args .c_str ());
1424
+ if (o.args .length () > 34 ) {
1425
+ printf (" \n %34s" , " " );
1426
+ }
1427
+
1428
+ // printf("%s\n", o.desc.c_str());
1429
+ // print line by line and pad with spaces
1430
+ const auto desc = o.desc ;
1431
+ size_t start = 0 ;
1432
+ size_t end = desc.find (' \n ' );
1433
+ while (end != std::string::npos) {
1434
+ printf (" %s\n %34s" , desc.substr (start, end - start).c_str (), " " );
1435
+ start = end + 1 ;
1436
+ end = desc.find (' \n ' , start);
1437
+ }
1438
+
1439
+ printf (" %s\n " , desc.substr (start).c_str ());
1440
+ }
1441
+
1409
1442
printf (" --samplers samplers that will be used for generation in the order, separated by \' ;\'\n " );
1410
1443
printf (" (default: %s)\n " , sampler_type_names.c_str ());
1411
1444
printf (" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n " , sampler_type_chars.c_str ());
@@ -1549,6 +1582,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
1549
1582
printf (" print token count every N tokens (default: %d)\n " , params.n_print );
1550
1583
printf (" --check-tensors check model tensor data for invalid values\n " );
1551
1584
printf (" \n " );
1585
+
1552
1586
#ifndef LOG_DISABLE_LOGS
1553
1587
log_print_usage ();
1554
1588
#endif // LOG_DISABLE_LOGS
@@ -1611,24 +1645,6 @@ std::string string_get_sortable_timestamp() {
1611
1645
return std::string (timestamp_no_ns) + " ." + std::string (timestamp_ns);
1612
1646
}
1613
1647
1614
- std::string string_random_prompt (std::mt19937 & rng) {
1615
- const int r = rng () % 10 ;
1616
- switch (r) {
1617
- case 0 : return " So" ;
1618
- case 1 : return " Once upon a time" ;
1619
- case 2 : return " When" ;
1620
- case 3 : return " The" ;
1621
- case 4 : return " After" ;
1622
- case 5 : return " If" ;
1623
- case 6 : return " import" ;
1624
- case 7 : return " He" ;
1625
- case 8 : return " She" ;
1626
- case 9 : return " They" ;
1627
- }
1628
-
1629
- GGML_UNREACHABLE ();
1630
- }
1631
-
1632
1648
void string_process_escapes (std::string & input) {
1633
1649
std::size_t input_len = input.length ();
1634
1650
std::size_t output_idx = 0 ;
@@ -2906,7 +2922,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
2906
2922
yaml_dump_string_multiline (stream, " in_suffix" , params.input_prefix .c_str ());
2907
2923
fprintf (stream, " instruct: %s # default: false\n " , params.instruct ? " true" : " false" );
2908
2924
fprintf (stream, " interactive: %s # default: false\n " , params.interactive ? " true" : " false" );
2909
- fprintf (stream, " interactive_specials: %s # default: false\n " , params.interactive_specials ? " true" : " false" );
2910
2925
fprintf (stream, " interactive_first: %s # default: false\n " , params.interactive_first ? " true" : " false" );
2911
2926
fprintf (stream, " keep: %d # default: 0\n " , params.n_keep );
2912
2927
fprintf (stream, " logdir: %s # default: unset (no logging)\n " , params.logdir .c_str ());
@@ -2956,7 +2971,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
2956
2971
fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
2957
2972
fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
2958
2973
yaml_dump_vector_int (stream, " prompt_tokens" , prompt_tokens);
2959
- fprintf (stream, " random_prompt: %s # default: false\n " , params.random_prompt ? " true" : " false" );
2960
2974
fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.penalty_repeat );
2961
2975
2962
2976
fprintf (stream, " reverse_prompt:\n " );
0 commit comments