@@ -19714,8 +19714,23 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
19714
19714
gguf_free(c);
19715
19715
}
19716
19716
19717
- LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
19718
- LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0);
19717
+ LLAMA_LOG_INFO("\n===========================================\n");
19718
+ LLAMA_LOG_INFO("%s: model size = %8.2f B\n", __func__, total_size_org/1.0);
19719
+ LLAMA_LOG_INFO("%s: model size = %8.2f KB\n", __func__, total_size_org/1000.0);
19720
+ LLAMA_LOG_INFO("%s: model size = %8.2f KiB\n", __func__, total_size_org/1024.0);
19721
+ LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1000.0/1000.0);
19722
+ LLAMA_LOG_INFO("%s: model size = %8.2f MiB\n", __func__, total_size_org/1024.0/1024.0);
19723
+ LLAMA_LOG_INFO("%s: model size = %8.2f GB\n", __func__, total_size_org/1000.0/1000.0/1000.0);
19724
+ LLAMA_LOG_INFO("%s: model size = %8.2f GiB\n", __func__, total_size_org/1024.0/1024.0/1024.0);
19725
+ LLAMA_LOG_INFO("===========================================\n");
19726
+ LLAMA_LOG_INFO("%s: quant size = %8.2f B\n", __func__, total_size_new/1.0);
19727
+ LLAMA_LOG_INFO("%s: quant size = %8.2f KB\n", __func__, total_size_new/1000.0);
19728
+ LLAMA_LOG_INFO("%s: quant size = %8.2f KiB\n", __func__, total_size_new/1024.0);
19729
+ LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1000.0/1000.0);
19730
+ LLAMA_LOG_INFO("%s: quant size = %8.2f MiB\n", __func__, total_size_new/1024.0/1024.0);
19731
+ LLAMA_LOG_INFO("%s: quant size = %8.2f GB\n", __func__, total_size_new/1000.0/1000.0/1000.0);
19732
+ LLAMA_LOG_INFO("%s: quant size = %8.2f GiB\n", __func__, total_size_new/1024.0/1024.0/1024.0);
19733
+ LLAMA_LOG_INFO("===========================================");
19719
19734
19720
19735
if (qs.n_fallback > 0) {
19721
19736
LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n",
0 commit comments