@@ -731,41 +731,12 @@ void sigint_handler(int signo) {
731
731
}
732
732
#endif
733
733
734
- const char * llama_print_system_info (void ) {
735
- static std::string s;
736
-
737
- s = " " ;
738
- s += " AVX = " + std::to_string (ggml_cpu_has_avx ()) + " | " ;
739
- s += " AVX2 = " + std::to_string (ggml_cpu_has_avx2 ()) + " | " ;
740
- s += " AVX512 = " + std::to_string (ggml_cpu_has_avx512 ()) + " | " ;
741
- s += " FMA = " + std::to_string (ggml_cpu_has_fma ()) + " | " ;
742
- s += " NEON = " + std::to_string (ggml_cpu_has_neon ()) + " | " ;
743
- s += " ARM_FMA = " + std::to_string (ggml_cpu_has_arm_fma ()) + " | " ;
744
- s += " F16C = " + std::to_string (ggml_cpu_has_f16c ()) + " | " ;
745
- s += " FP16_VA = " + std::to_string (ggml_cpu_has_fp16_va ()) + " | " ;
746
- s += " WASM_SIMD = " + std::to_string (ggml_cpu_has_wasm_simd ()) + " | " ;
747
- s += " BLAS = " + std::to_string (ggml_cpu_has_blas ()) + " | " ;
748
- s += " SSE3 = " + std::to_string (ggml_cpu_has_sse3 ()) + " | " ;
749
- s += " VSX = " + std::to_string (ggml_cpu_has_vsx ()) + " | " ;
750
-
751
- return s.c_str ();
752
- }
753
-
754
- int llama_main (int argc, char ** argv) {
755
- ggml_time_init ();
756
- const int64_t t_main_start_us = ggml_time_us ();
757
-
758
- gpt_params params;
759
- params.model = " models/llama-7B/ggml-model.bin" ;
760
-
761
- if (gpt_params_parse (argc, argv, params) == false ) {
762
- return 1 ;
763
- }
764
-
765
- if (params.n_ctx > 2048 ) {
766
- fprintf (stderr, " %s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
767
- " expect poor results\n " , __func__, params.n_ctx );
768
- }
734
+ int llama_main (
735
+ gpt_params params,
736
+ gpt_vocab vocab,
737
+ llama_model model,
738
+ int64_t t_load_us,
739
+ int64_t t_main_start_us) {
769
740
770
741
if (params.seed < 0 ) {
771
742
params.seed = time (NULL );
@@ -781,30 +752,6 @@ int llama_main(int argc, char ** argv) {
781
752
// params.prompt = R"(// this function checks if the number n is prime
782
753
// bool is_prime(int n) {)";
783
754
784
- int64_t t_load_us = 0 ;
785
-
786
- gpt_vocab vocab;
787
- llama_model model;
788
-
789
- // load the model
790
- {
791
- const ggml_type memory_type = params.memory_f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
792
- const int64_t t_start_us = ggml_time_us ();
793
- if (!llama_model_load (params.model , model, vocab, params.n_ctx , memory_type)) {
794
- fprintf (stderr, " %s: failed to load model from '%s'\n " , __func__, params.model .c_str ());
795
- return 1 ;
796
- }
797
-
798
- t_load_us = ggml_time_us () - t_start_us;
799
- }
800
-
801
- // print system information
802
- {
803
- fprintf (stderr, " \n " );
804
- fprintf (stderr, " system_info: n_threads = %d / %d | %s\n " ,
805
- params.n_threads , std::thread::hardware_concurrency (), llama_print_system_info ());
806
- }
807
-
808
755
int n_past = 0 ;
809
756
810
757
int64_t t_sample_us = 0 ;
0 commit comments