@@ -14,25 +14,25 @@ struct quant_option {
14
14
};
15
15
16
16
static const std::vector<struct quant_option > QUANT_OPTIONS = {
17
- { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 3.50G , +0.2499 ppl @ 7B" , },
18
- { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1846 ppl @ 7B" , },
19
- { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 4.30G , +0.0796 ppl @ 7B" , },
20
- { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 4.70G, +0.0415 ppl @ 7B" , },
17
+ { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G , +0.2166 ppl @ LLaMA-v1- 7B" , },
18
+ { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1- 7B" , },
19
+ { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G , +0.0683 ppl @ LLaMA-v1- 7B" , },
20
+ { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 4.70G, +0.0349 ppl @ LLaMA-v1- 7B" , },
21
21
#ifdef GGML_USE_K_QUANTS
22
- { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.67G , +0.8698 ppl @ 7B" , },
22
+ { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G , +0.6717 ppl @ LLaMA-v1- 7B" , },
23
23
{ " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
24
- { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G, +0.5505 ppl @ 7B" , },
25
- { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.06G , +0.2437 ppl @ 7B" , },
26
- { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1803 ppl @ 7B" , },
24
+ { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G, +0.5551 ppl @ LLaMA-v1- 7B" , },
25
+ { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.07G , +0.2496 ppl @ LLaMA-v1- 7B" , },
26
+ { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1764 ppl @ LLaMA-v1- 7B" , },
27
27
{ " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " alias for Q4_K_M" , },
28
- { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 3.56G , +0.1149 ppl @ 7B" , },
29
- { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 3.80G, +0.0535 ppl @ 7B" , },
28
+ { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 3.59G , +0.0992 ppl @ LLaMA-v1- 7B" , },
29
+ { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 3.80G, +0.0532 ppl @ LLaMA-v1- 7B" , },
30
30
{ " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " alias for Q5_K_M" , },
31
- { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 4.33G, +0.0353 ppl @ 7B" , },
32
- { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0142 ppl @ 7B" , },
33
- { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, +0.0044 ppl @ 7B" , },
31
+ { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 4.33G, +0.0400 ppl @ LLaMA-v1- 7B" , },
32
+ { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0122 ppl @ LLaMA-v1- 7B" , },
33
+ { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, -0.0008 ppl @ LLaMA-v1- 7B" , },
34
34
#endif
35
- { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ 7B" , },
35
+ { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ LLaMA-v1- 7B" , },
36
36
{ " F16" , LLAMA_FTYPE_MOSTLY_F16, " 13.00G @ 7B" , },
37
37
{ " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
38
38
};
0 commit comments