@@ -927,6 +927,7 @@ enum e_model {
927
927
928
928
static const size_t kB = 1024 ;
929
929
static const size_t MB = kB *kB ;
930
+ static const size_t GB = kB *kB *kB ;
930
931
931
932
// default hparams (LLaMA 7B)
932
933
struct llama_hparams {
@@ -1280,6 +1281,7 @@ struct llama_model_loader {
1280
1281
int n_created = 0 ;
1281
1282
1282
1283
int64_t n_elements = 0 ;
1284
+ size_t n_bytes = 0 ;
1283
1285
1284
1286
bool use_mmap = false ;
1285
1287
@@ -1312,6 +1314,7 @@ struct llama_model_loader {
1312
1314
const char * name = gguf_get_tensor_name (ctx_gguf, i);
1313
1315
struct ggml_tensor * t = ggml_get_tensor (ctx_meta, name);
1314
1316
n_elements += ggml_nelements (t);
1317
+ n_bytes += ggml_nbytes (t);
1315
1318
}
1316
1319
1317
1320
LLAMA_LOG_INFO (" %s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n " ,
@@ -1909,7 +1912,12 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
1909
1912
LLAMA_LOG_INFO (" %s: freq_scale = %g\n " , __func__, hparams.rope_freq_scale );
1910
1913
LLAMA_LOG_INFO (" %s: model type = %s\n " , __func__, llama_model_type_name (model.type ));
1911
1914
LLAMA_LOG_INFO (" %s: model ftype = %s\n " , __func__, llama_model_ftype_name (model.ftype ).c_str ());
1912
- LLAMA_LOG_INFO (" %s: model size = %.2f B\n " , __func__, ml.n_elements *1e-9 );
1915
+ LLAMA_LOG_INFO (" %s: model params = %.2f B\n " , __func__, ml.n_elements *1e-9 );
1916
+ if (ml.n_bytes < GB) {
1917
+ LLAMA_LOG_INFO (" %s: model size = %.2f MiB (%.2f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1918
+ } else {
1919
+ LLAMA_LOG_INFO (" %s: model size = %.2f GiB (%.2f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1920
+ }
1913
1921
1914
1922
// general kv
1915
1923
LLAMA_LOG_INFO (" %s: general.name = %s\n " , __func__, model.name .c_str ());
@@ -3495,7 +3503,7 @@ static struct ggml_cgraph * llm_build_starcoder(
3495
3503
3496
3504
ggml_allocr_alloc (lctx.alloc , token);
3497
3505
if (!ggml_allocr_is_measure (lctx.alloc )) {
3498
- memcpy (token->data , embd, N * n_embd * ggml_element_size (inpL ));
3506
+ memcpy (token->data , embd, N * n_embd * ggml_element_size (token ));
3499
3507
}
3500
3508
}
3501
3509
0 commit comments