@@ -6859,22 +6859,68 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
6859
6859
LLAMA_LOG_INFO("%s: model type = %s\n", __func__, llama_model_type_name(model.type));
6860
6860
LLAMA_LOG_INFO("%s: model ftype = %s\n", __func__, llama_model_ftype_name(model.ftype).c_str());
6861
6861
if (ml.n_elements >= 1e12) {
6862
- LLAMA_LOG_INFO("%s: model params = %.2f T\n", __func__, ml.n_elements*1e-12);
6862
+ LLAMA_LOG_INFO("%s: model params = %.3f T\n", __func__, ml.n_elements*1e-12);
6863
6863
} else if (ml.n_elements >= 1e9) {
6864
- LLAMA_LOG_INFO("%s: model params = %.2f B\n", __func__, ml.n_elements*1e-9);
6864
+ LLAMA_LOG_INFO("%s: model params = %.3f B\n", __func__, ml.n_elements*1e-9);
6865
6865
} else if (ml.n_elements >= 1e6) {
6866
- LLAMA_LOG_INFO("%s: model params = %.2f M\n", __func__, ml.n_elements*1e-6);
6866
+ LLAMA_LOG_INFO("%s: model params = %.3f M\n", __func__, ml.n_elements*1e-6);
6867
6867
} else {
6868
- LLAMA_LOG_INFO("%s: model params = %.2f K\n", __func__, ml.n_elements*1e-3);
6869
- }
6870
-
6871
- LLAMA_LOG_INFO("%s: model size = %.2f Bytes (%.2f BPW) \n", __func__, ml.n_bytes/1.0, ml.n_bytes*8.0/ml.n_elements);
6872
- LLAMA_LOG_INFO("%s: model size = %.2f KB (%.2f BPW) \n", __func__, ml.n_bytes/1000.0, ml.n_bytes*8.0/ml.n_elements);
6873
- LLAMA_LOG_INFO("%s: model size = %.2f KiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0, ml.n_bytes*8.0/ml.n_elements);
6874
- LLAMA_LOG_INFO("%s: model size = %.2f MB (%.2f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0 , ml.n_bytes*8.0/ml.n_elements);
6875
- LLAMA_LOG_INFO("%s: model size = %.2f MiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6876
- LLAMA_LOG_INFO("%s: model size = %.2f GB (%.2f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0/1000.0, ml.n_bytes*8.0/ml.n_elements);
6877
- LLAMA_LOG_INFO("%s: model size = %.2f GiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6868
+ LLAMA_LOG_INFO("%s: model params = %.3f K\n", __func__, ml.n_elements*1e-3);
6869
+ // LLAMA_LOG_INFO("%s: model params = %.2f T\n", __func__, ml.n_elements*1e-12);
6870
+ // } else if (ml.n_elements >= 1e9) {
6871
+ // LLAMA_LOG_INFO("%s: model params = %.2f B\n", __func__, ml.n_elements*1e-9);
6872
+ // } else if (ml.n_elements >= 1e6) {
6873
+ // LLAMA_LOG_INFO("%s: model params = %.2f M\n", __func__, ml.n_elements*1e-6);
6874
+ // } else {
6875
+ // LLAMA_LOG_INFO("%s: model params = %.2f K\n", __func__, ml.n_elements*1e-3);
6876
+ }
6877
+
6878
+ LLAMA_LOG_INFO("%s: model size = %.2f Bytes (%.3f BPW) \n", __func__, ml.n_bytes/1.0, ml.n_bytes*8.0/ml.n_elements);
6879
+ LLAMA_LOG_INFO("%s: model size = %.2f KB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0, ml.n_bytes*8.0/ml.n_elements);
6880
+ LLAMA_LOG_INFO("%s: model size = %.2f KiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0, ml.n_bytes*8.0/ml.n_elements);
6881
+ LLAMA_LOG_INFO("%s: model size = %.2f MB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0 , ml.n_bytes*8.0/ml.n_elements);
6882
+ LLAMA_LOG_INFO("%s: model size = %.2f MiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6883
+ LLAMA_LOG_INFO("%s: model size = %.2f GB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0/1000.0, ml.n_bytes*8.0/ml.n_elements);
6884
+ LLAMA_LOG_INFO("%s: model size = %.2f GiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6885
+
6886
+ // if (ml.n_bytes < GiB) {
6887
+ // LLAMA_LOG_INFO("%s: model size = %.3f MiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6888
+ // } else {
6889
+ // LLAMA_LOG_INFO("%s: model size = %.3f GiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6890
+ // }
6891
+
6892
+ {
6893
+ auto n_bytes = ml.n_bytes;
6894
+ auto n_elements = ml.n_elements;
6895
+ auto meta_tke = ml.get_tensor_meta("token_embd.weight");
6896
+ auto meta_out = ml.get_tensor_meta("output.weight");
6897
+ if (meta_tke && meta_out) {
6898
+ n_bytes -= ggml_nbytes(meta_tke);
6899
+ n_elements -= ggml_nelements(meta_tke);
6900
+ n_bytes -= ggml_nbytes(meta_out);
6901
+ n_elements -= ggml_nelements(meta_out);
6902
+
6903
+ LLAMA_LOG_INFO("%s: repeating layers = %.2f Bytes (%.3f BPW) \n", __func__, n_bytes/1.0, n_bytes*8.0/n_elements);
6904
+ LLAMA_LOG_INFO("%s: repeating layers = %.2f KB (%.3f BPW) \n", __func__, n_bytes/1000.0, n_bytes*8.0/n_elements);
6905
+ LLAMA_LOG_INFO("%s: repeating layers = %.2f KiB (%.3f BPW) \n", __func__, n_bytes/1024.0, n_bytes*8.0/n_elements);
6906
+ LLAMA_LOG_INFO("%s: repeating layers = %.2f MB (%.3f BPW) \n", __func__, n_bytes/1000.0/1000.0, n_bytes*8.0/n_elements);
6907
+ LLAMA_LOG_INFO("%s: repeating layers = %.2f MiB (%.3f BPW) \n", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
6908
+ LLAMA_LOG_INFO("%s: repeating layers = %.2f GB (%.3f BPW) \n", __func__, n_bytes/1000.0/1000.0/1000.0, n_bytes*8.0/n_elements);
6909
+ LLAMA_LOG_INFO("%s: repeating layers = %.2f GiB (%.3f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
6910
+
6911
+ // if (n_bytes < GiB) {
6912
+ // LLAMA_LOG_INFO("%s: repeating layers = %.3f MiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
6913
+ // } else {
6914
+ // LLAMA_LOG_INFO("%s: repeating layers = %.3f GiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
6915
+ // }
6916
+
6917
+ if (ml.n_elements >= 1e9) {
6918
+ LLAMA_LOG_INFO(", %.3f B parameters)\n", n_elements*1e-9);
6919
+ } else {
6920
+ LLAMA_LOG_INFO(", %.3f M parameters)\n", n_elements*1e-6);
6921
+ }
6922
+ }
6923
+ }
6878
6924
6879
6925
// general kv
6880
6926
LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, model.name.c_str());
0 commit comments