1
1
#include " common.h"
2
+ #include " build-info.h"
3
+ #include " llama.h"
2
4
5
+ #include < algorithm>
3
6
#include < cassert>
4
- #include < iostream >
7
+ #include < cmath >
5
8
#include < cstring>
9
+ #include < ctime>
6
10
#include < fstream>
7
- #include < string>
8
11
#include < iterator>
9
- #include < algorithm>
12
+ #include < iostream>
13
+ #include < regex>
10
14
#include < sstream>
15
+ #include < string>
16
+ #include < sys/stat.h>
11
17
#include < unordered_set>
12
- #include < regex >
18
+ #include < vector >
13
19
14
20
#if defined(__APPLE__) && defined(__MACH__)
15
21
#include < sys/types.h>
@@ -93,7 +99,6 @@ void process_escapes(std::string& input) {
93
99
94
100
bool gpt_params_parse (int argc, char ** argv, gpt_params & params) {
95
101
bool invalid_param = false ;
96
- bool escape_prompt = false ;
97
102
std::string arg;
98
103
gpt_params default_params;
99
104
const std::string arg_prefix = " --" ;
@@ -125,8 +130,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
125
130
break ;
126
131
}
127
132
params.prompt = argv[i];
128
- } else if (arg == " -e" ) {
129
- escape_prompt = true ;
133
+ } else if (arg == " -e" || arg == " --escape " ) {
134
+ params. escape = true ;
130
135
} else if (arg == " --prompt-cache" ) {
131
136
if (++i >= argc) {
132
137
invalid_param = true ;
@@ -415,6 +420,16 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
415
420
break ;
416
421
}
417
422
params.antiprompt .push_back (argv[i]);
423
+ } else if (arg == " -ld" || arg == " --logdir" ) {
424
+ if (++i >= argc) {
425
+ invalid_param = true ;
426
+ break ;
427
+ }
428
+ params.logdir = argv[i];
429
+
430
+ if (params.logdir .back () != ' /' ) {
431
+ params.logdir += " /" ;
432
+ }
418
433
} else if (arg == " --perplexity" ) {
419
434
params.perplexity = true ;
420
435
} else if (arg == " --hellaswag" ) {
@@ -508,7 +523,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
508
523
exit (1 );
509
524
}
510
525
511
- if (escape_prompt ) {
526
+ if (params. escape ) {
512
527
process_escapes (params.prompt );
513
528
process_escapes (params.input_prefix );
514
529
process_escapes (params.input_suffix );
@@ -534,7 +549,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
534
549
fprintf (stdout, " -t N, --threads N number of threads to use during computation (default: %d)\n " , params.n_threads );
535
550
fprintf (stdout, " -p PROMPT, --prompt PROMPT\n " );
536
551
fprintf (stdout, " prompt to start generation with (default: empty)\n " );
537
- fprintf (stdout, " -e process prompt escapes sequences (\\ n, \\ r, \\ t, \\ ', \\\" , \\\\ )\n " );
552
+ fprintf (stdout, " -e, --escape process prompt escapes sequences (\\ n, \\ r, \\ t, \\ ', \\\" , \\\\ )\n " );
538
553
fprintf (stdout, " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n " );
539
554
fprintf (stdout, " --prompt-cache-all if specified, saves user input and generations to cache as well.\n " );
540
555
fprintf (stdout, " not supported with --interactive or other interactive options\n " );
@@ -613,6 +628,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
613
628
fprintf (stdout, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n " );
614
629
fprintf (stdout, " -m FNAME, --model FNAME\n " );
615
630
fprintf (stdout, " model path (default: %s)\n " , params.model .c_str ());
631
+ fprintf (stdout, " -ld LOGDIR, --logdir LOGDIR\n " );
632
+ fprintf (stdout, " path under which to save YAML logs (no logging if unset)\n " );
616
633
fprintf (stdout, " \n " );
617
634
}
618
635
@@ -764,3 +781,242 @@ std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token
764
781
return std::string (result.data (), result.size ());
765
782
}
766
783
784
+ // returns true if successful, false otherwise
785
+ bool create_directory_with_parents (const std::string & path) {
786
+
787
+ // if the path already exists, check whether it's a directory
788
+ struct stat info;
789
+ if (stat (path.c_str (), &info) == 0 ) {
790
+ return S_ISDIR (info.st_mode );
791
+ }
792
+
793
+ size_t pos_slash = 1 ; // skip leading slashes for directory creation
794
+
795
+ // process path from front to back, procedurally creating directories
796
+ while ((pos_slash = path.find (' /' , pos_slash)) != std::string::npos) {
797
+ const std::string subpath = path.substr (0 , pos_slash);
798
+ struct stat info;
799
+
800
+ // if the path already exists, ensure that it's a directory
801
+ if (stat (subpath.c_str (), &info) == 0 ) {
802
+ if (!S_ISDIR (info.st_mode )) {
803
+ return false ;
804
+ }
805
+ } else {
806
+ // create parent directories
807
+ const int ret = mkdir (subpath.c_str (), 0755 );
808
+ if (ret != 0 ) {
809
+ return false ;
810
+ }
811
+ }
812
+
813
+ pos_slash += 1 ;
814
+ }
815
+
816
+ return true ;
817
+ }
818
+
819
+ void dump_vector_float_yaml (FILE * stream, const char * prop_name, const std::vector<float > & data) {
820
+ fprintf (stream, " %s: [" , prop_name);
821
+ for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
822
+ fprintf (stream, " %e, " , data[i]);
823
+ }
824
+ fprintf (stream, " %e]\n " , data.back ());
825
+ }
826
+
827
+ void dump_vector_int_yaml (FILE * stream, const char * prop_name, const std::vector<int > & data) {
828
+ fprintf (stream, " %s: [" , prop_name);
829
+ for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
830
+ fprintf (stream, " %d, " , data[i]);
831
+ }
832
+ fprintf (stream, " %d]\n " , data.back ());
833
+ }
834
+
835
+ void dump_string_yaml_multiline (FILE * stream, const char * prop_name, const char * data,
836
+ const bool remove_first) {
837
+ std::string data_str (data == NULL ? " " : data);
838
+
839
+ if (data_str.empty ()) {
840
+ fprintf (stream, " %s:\n " , prop_name);
841
+ return ;
842
+ }
843
+
844
+ if (remove_first) {
845
+ data_str = data_str.substr (1 );
846
+ }
847
+
848
+ size_t pos_start = 0 ;
849
+ size_t pos_found = 0 ;
850
+
851
+ if (!data_str.empty () && (std::isspace (data_str[0 ]) || std::isspace (data_str.back ()))) {
852
+ data_str = std::regex_replace (data_str, std::regex (" \n " ), " \\ n" );
853
+ data_str = std::regex_replace (data_str, std::regex (" \" " ), " \\\" " );
854
+ data_str = " \" " + data_str + " \" " ;
855
+ fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
856
+ return ;
857
+ }
858
+
859
+ if (data_str.find (' \n ' ) == std::string::npos) {
860
+ fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
861
+ return ;
862
+ }
863
+
864
+ fprintf (stream, " %s: |\n " , prop_name);
865
+ while ((pos_found = data_str.find (' \n ' , pos_start)) != std::string::npos) {
866
+ fprintf (stream, " %s\n " , data_str.substr (pos_start, pos_found-pos_start).c_str ());
867
+ pos_start = pos_found + 1 ;
868
+ }
869
+ }
870
+
871
+ std::string get_sortable_timestamp () {
872
+ using clock = std::chrono::system_clock;
873
+
874
+ const clock ::time_point current_time = clock ::now ();
875
+ const time_t as_time_t = clock ::to_time_t (current_time);
876
+ char timestamp_no_ns[100 ];
877
+ std::strftime (timestamp_no_ns, 100 , " %Y-%m-%dT%H:%M:%S" , std::localtime (&as_time_t ));
878
+
879
+ const int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
880
+ current_time.time_since_epoch () % 1000000000 ).count ();
881
+ char timestamp_ns[10 ];
882
+ snprintf (timestamp_ns, 11 , " %09ld" , ns);
883
+
884
+ return std::string (timestamp_no_ns) + " ." + std::string (timestamp_ns);
885
+ }
886
+
887
+ void dump_non_result_info_yaml (FILE * stream, const gpt_params & params, const llama_context * lctx,
888
+ const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model) {
889
+ fprintf (stream, " build_commit: %s\n " , BUILD_COMMIT);
890
+ fprintf (stream, " build_number: %d\n " , BUILD_NUMBER);
891
+ fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
892
+ fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
893
+ fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
894
+ fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
895
+ fprintf (stream, " cpu_has_avx512_vbmi: %s\n " , ggml_cpu_has_avx512_vbmi () ? " true" : " false" );
896
+ fprintf (stream, " cpu_has_avx512_vnni: %s\n " , ggml_cpu_has_avx512_vnni () ? " true" : " false" );
897
+ fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
898
+ fprintf (stream, " cpu_has_cublas: %s\n " , ggml_cpu_has_cublas () ? " true" : " false" );
899
+ fprintf (stream, " cpu_has_clblast: %s\n " , ggml_cpu_has_clblast () ? " true" : " false" );
900
+ fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
901
+ fprintf (stream, " cpu_has_gpublas: %s\n " , ggml_cpu_has_gpublas () ? " true" : " false" );
902
+ fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
903
+ fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
904
+ fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
905
+ fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
906
+ fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
907
+ fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
908
+ fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
909
+
910
+ #ifdef NDEBUG
911
+ fprintf (stream, " debug: false\n " );
912
+ #else
913
+ fprintf (stream, " debug: true\n " );
914
+ #endif // NDEBUG
915
+
916
+ fprintf (stream, " model: %s\n " , model);
917
+
918
+ #ifdef __OPTIMIZE__
919
+ fprintf (stream, " optimize: true\n " );
920
+ #else
921
+ fprintf (stream, " optimize: false\n " );
922
+ #endif // __OPTIMIZE__
923
+
924
+ fprintf (stream, " time: %s\n " , timestamp.c_str ());
925
+
926
+ fprintf (stream, " \n " );
927
+ fprintf (stream, " ###############\n " );
928
+ fprintf (stream, " # User Inputs #\n " );
929
+ fprintf (stream, " ###############\n " );
930
+ fprintf (stream, " \n " );
931
+
932
+ fprintf (stream, " batch_size: %d # default: 512\n " , params.n_batch );
933
+ dump_string_yaml_multiline (stream, " cfg_negative_prompt" , params.cfg_negative_prompt .c_str (), true );
934
+ fprintf (stream, " cfg_scale: %f # default: 1.0\n " , params.cfg_scale );
935
+ fprintf (stream, " chunks: %d # default: -1 (unlimited)\n " , params.n_chunks );
936
+ fprintf (stream, " color: %s # default: false\n " , params.use_color ? " true" : " false" );
937
+ fprintf (stream, " ctx_size: %d # default: 512\n " , params.n_ctx );
938
+ fprintf (stream, " escape: %s # default: false\n " , params.escape ? " true" : " false" );
939
+ fprintf (stream, " export: %s # default: false\n " , params.export_cgraph ? " true" : " false" );
940
+ fprintf (stream, " file: # never logged, see prompt instead. Can still be specified for input.\n " );
941
+ fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , params.frequency_penalty );
942
+ dump_string_yaml_multiline (stream, " grammar" , params.grammar .c_str (), true );
943
+ fprintf (stream, " grammar-file: # never logged, see grammar instead. Can still be specified for input.\n " );
944
+ fprintf (stream, " hellaswag: %s # default: false\n " , params.hellaswag ? " true" : " false" );
945
+ fprintf (stream, " hellaswag_tasks: %ld # default: 400\n " , params.hellaswag_tasks );
946
+
947
+ const auto logit_bias_eos = params.logit_bias .find (llama_token_eos (lctx));
948
+ const bool ignore_eos = logit_bias_eos != params.logit_bias .end () && logit_bias_eos->second == -INFINITY;
949
+ fprintf (stream, " ignore_eos: %s # default: false\n " , ignore_eos ? " true" : " false" );
950
+
951
+ fprintf (stream, " instruct: %s # default: false\n " , params.instruct ? " true" : " false" );
952
+ fprintf (stream, " interactive: %s # default: false\n " , params.interactive ? " true" : " false" );
953
+ fprintf (stream, " interactive_first: %s # default: false\n " , params.interactive_first ? " true" : " false" );
954
+ dump_string_yaml_multiline (stream, " in_prefix" , params.input_prefix .c_str (), false );
955
+ fprintf (stream, " in_prefix_bos: %s # default: false\n " , params.input_prefix_bos ? " true" : " false" );
956
+ dump_string_yaml_multiline (stream, " in_suffix" , params.input_prefix .c_str (), false );
957
+ fprintf (stream, " keep: %d # default: 0\n " , params.n_keep );
958
+ fprintf (stream, " logdir: %s # default: unset (no logging)\n " , params.logdir .c_str ());
959
+
960
+ fprintf (stream, " logit_bias:\n " );
961
+ for (std::pair<llama_token, float > lb : params.logit_bias ) {
962
+ if (ignore_eos && lb.first == logit_bias_eos->first ) {
963
+ continue ;
964
+ }
965
+ fprintf (stream, " %d: %f" , lb.first , lb.second );
966
+ }
967
+
968
+ fprintf (stream, " lora: %s\n " , params.lora_adapter .c_str ());
969
+ fprintf (stream, " lora_base: %s\n " , params.lora_base .c_str ());
970
+ fprintf (stream, " low_vram: %s # default: false\n " , params.low_vram ? " true" : " false" );
971
+ fprintf (stream, " main_gpu: %d # default: 0\n " , params.main_gpu );
972
+ fprintf (stream, " mirostat: %d # default: 0 (disabled)\n " , params.mirostat );
973
+ fprintf (stream, " mirostat_ent: %f # default: 5.0\n " , params.mirostat_tau );
974
+ fprintf (stream, " mirostat_lr: %f # default: 0.1\n " , params.mirostat_eta );
975
+ fprintf (stream, " mtest: %s # default: false\n " , params.mem_test ? " true" : " false" );
976
+ fprintf (stream, " mul_mat_q: %s # default: false\n " , params.mul_mat_q ? " true" : " false" );
977
+ fprintf (stream, " memory_f32: %s # default: false\n " , !params.memory_f16 ? " true" : " false" );
978
+ fprintf (stream, " mlock: %s # default: false\n " , params.use_mlock ? " true" : " false" );
979
+ fprintf (stream, " model: %s # default: models/7B/ggml-model.bin\n " , params.model .c_str ());
980
+ fprintf (stream, " model_alias: %s # default: unknown\n " , params.model_alias .c_str ());
981
+ fprintf (stream, " multiline_input: %s # default: false\n " , params.multiline_input ? " true" : " false" );
982
+ fprintf (stream, " n_gpu_layers: %d # default: 0\n " , params.n_gpu_layers );
983
+ fprintf (stream, " n_predict: %d # default: -1 (unlimited)\n " , params.n_predict );
984
+ fprintf (stream, " no_mmap: %s # default: false\n " , !params.use_mmap ? " true" : " false" );
985
+ fprintf (stream, " no_penalize_nl: %s # default: false\n " , !params.penalize_nl ? " true" : " false" );
986
+ fprintf (stream, " numa: %s # default: false\n " , params.numa ? " true" : " false" );
987
+ fprintf (stream, " presence_penalty: %f # default: 0.0\n " , params.presence_penalty );
988
+ dump_string_yaml_multiline (stream, " prompt" , params.prompt .c_str (), true );
989
+ fprintf (stream, " prompt_cache: %s\n " , params.path_prompt_cache .c_str ());
990
+ fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
991
+ fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
992
+ dump_vector_int_yaml (stream, " prompt_tokens" , prompt_tokens);
993
+ fprintf (stream, " random_prompt: %s # default: false\n " , params.random_prompt ? " true" : " false" );
994
+ fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , params.repeat_penalty );
995
+
996
+ fprintf (stream, " reverse_prompt:\n " );
997
+ for (std::string ap : params.antiprompt ) {
998
+ size_t pos = 0 ;
999
+ while ((pos = ap.find (' \n ' , pos)) != std::string::npos) {
1000
+ ap.replace (pos, 1 , " \\ n" );
1001
+ pos += 1 ;
1002
+ }
1003
+
1004
+ fprintf (stream, " - %s\n " , ap.c_str ());
1005
+ }
1006
+
1007
+ fprintf (stream, " rope_freq_base: %f # default: 10000.0\n " , params.rope_freq_base );
1008
+ fprintf (stream, " rope_freq_scale: %f # default: 1.0\n " , params.rope_freq_scale );
1009
+ fprintf (stream, " seed: %d # default: -1 (random seed)\n " , params.seed );
1010
+ fprintf (stream, " simple_io: %s # default: false\n " , params.simple_io ? " true" : " false" );
1011
+
1012
+ const std::vector<float > tensor_split_vector (params.tensor_split , params.tensor_split + LLAMA_MAX_DEVICES);
1013
+ dump_vector_float_yaml (stream, " tensor_split" , tensor_split_vector);
1014
+
1015
+ fprintf (stream, " temp: %f # default: 0.8\n " , params.temp );
1016
+ fprintf (stream, " threads: %d # default: %d\n " , params.n_threads , std::thread::hardware_concurrency ());
1017
+ fprintf (stream, " tfs: %f # default: 1.0\n " , params.tfs_z );
1018
+ fprintf (stream, " top_k: %d # default: 40\n " , params.top_k );
1019
+ fprintf (stream, " top_p: %f # default: 0.95\n " , params.top_p );
1020
+ fprintf (stream, " typical_p: %f # default: 1.0\n " , params.typical_p );
1021
+ fprintf (stream, " verbose_prompt: %s # default: false\n " , params.verbose_prompt ? " true" : " false" );
1022
+ }
0 commit comments