1
- namespace LLama . Native
1
+ namespace LLama . Native
2
2
{
3
3
/// <summary>
4
4
/// Supported model file types
@@ -9,176 +9,191 @@ public enum LLamaFtype
9
9
/// All f32
10
10
/// </summary>
11
11
/// <remarks>Benchmark@7B: 26GB</remarks>
12
- LLAMA_FTYPE_ALL_F32 = 0 ,
12
+ ALL_F32 = 0 ,
13
13
14
14
/// <summary>
15
15
/// Mostly f16
16
16
/// </summary>
17
17
/// <remarks>Benchmark@7B: 13GB</remarks>
18
- LLAMA_FTYPE_MOSTLY_F16 = 1 ,
18
+ MOSTLY_F16 = 1 ,
19
19
20
20
/// <summary>
21
21
/// Mostly 8 bit
22
22
/// </summary>
23
23
/// <remarks>Benchmark@7B: 6.7GB, +0.0004ppl</remarks>
24
- LLAMA_FTYPE_MOSTLY_Q8_0 = 7 ,
24
+ MOSTLY_Q8_0 = 7 ,
25
25
26
26
/// <summary>
27
27
/// Mostly 4 bit
28
28
/// </summary>
29
29
/// <remarks>Benchmark@7B: 3.50GB, +0.2499 ppl</remarks>
30
- LLAMA_FTYPE_MOSTLY_Q4_0 = 2 ,
30
+ MOSTLY_Q4_0 = 2 ,
31
31
32
32
/// <summary>
33
33
/// Mostly 4 bit
34
34
/// </summary>
35
35
/// <remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
36
- LLAMA_FTYPE_MOSTLY_Q4_1 = 3 ,
36
+ MOSTLY_Q4_1 = 3 ,
37
37
38
38
/// <summary>
39
39
/// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
40
40
/// </summary>
41
- LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4 ,
41
+ MOSTLY_Q4_1_SOME_F16 = 4 ,
42
42
43
43
/// <summary>
44
44
/// Mostly 5 bit
45
45
/// </summary>
46
46
/// <remarks>Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl</remarks>
47
- LLAMA_FTYPE_MOSTLY_Q5_0 = 8 ,
47
+ MOSTLY_Q5_0 = 8 ,
48
48
49
49
/// <summary>
50
50
/// Mostly 5 bit
51
51
/// </summary>
52
52
/// <remarks>Benchmark@7B: 4.70GB, +0.0415 ppl</remarks>
53
- LLAMA_FTYPE_MOSTLY_Q5_1 = 9 ,
53
+ MOSTLY_Q5_1 = 9 ,
54
54
55
55
/// <summary>
56
56
/// K-Quant 2 bit
57
57
/// </summary>
58
58
/// <remarks>Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl</remarks>
59
- LLAMA_FTYPE_MOSTLY_Q2_K = 10 ,
59
+ MOSTLY_Q2_K = 10 ,
60
60
61
61
/// <summary>
62
62
/// K-Quant 3 bit (Small)
63
63
/// </summary>
64
64
/// <remarks>Benchmark@7B: 2.75GB, +0.5505 ppl</remarks>
65
- LLAMA_FTYPE_MOSTLY_Q3_K_S = 11 ,
65
+ MOSTLY_Q3_K_S = 11 ,
66
66
67
67
/// <summary>
68
68
/// K-Quant 3 bit (Medium)
69
69
/// </summary>
70
70
/// <remarks>Benchmark@7B: 3.06GB, +0.2437 ppl</remarks>
71
- LLAMA_FTYPE_MOSTLY_Q3_K_M = 12 ,
71
+ MOSTLY_Q3_K_M = 12 ,
72
72
73
73
/// <summary>
74
74
/// K-Quant 3 bit (Large)
75
75
/// </summary>
76
76
/// <remarks>Benchmark@7B: 3.35GB, +0.1803 ppl</remarks>
77
- LLAMA_FTYPE_MOSTLY_Q3_K_L = 13 ,
77
+ MOSTLY_Q3_K_L = 13 ,
78
78
79
79
/// <summary>
80
80
/// K-Quant 4 bit (Small)
81
81
/// </summary>
82
82
/// <remarks>Benchmark@7B: 3.56GB, +0.1149 ppl</remarks>
83
- LLAMA_FTYPE_MOSTLY_Q4_K_S = 14 ,
83
+ MOSTLY_Q4_K_S = 14 ,
84
84
85
85
/// <summary>
86
86
/// K-Quant 4 bit (Medium)
87
87
/// </summary>
88
88
/// <remarks>Benchmark@7B: 3.80GB, +0.0535 ppl</remarks>
89
- LLAMA_FTYPE_MOSTLY_Q4_K_M = 15 ,
89
+ MOSTLY_Q4_K_M = 15 ,
90
90
91
91
/// <summary>
92
92
/// K-Quant 5 bit (Small)
93
93
/// </summary>
94
94
/// <remarks>Benchmark@7B: 4.33GB, +0.0353 ppl</remarks>
95
- LLAMA_FTYPE_MOSTLY_Q5_K_S = 16 ,
95
+ MOSTLY_Q5_K_S = 16 ,
96
96
97
97
/// <summary>
98
98
/// K-Quant 5 bit (Medium)
99
99
/// </summary>
100
100
/// <remarks>Benchmark@7B: 4.45GB, +0.0142 ppl</remarks>
101
- LLAMA_FTYPE_MOSTLY_Q5_K_M = 17 ,
101
+ MOSTLY_Q5_K_M = 17 ,
102
102
103
103
/// <summary>
104
104
/// K-Quant 6 bit
105
105
/// </summary>
106
106
/// <remarks>Benchmark@7B: 5.15GB, +0.0044 ppl</remarks>
107
- LLAMA_FTYPE_MOSTLY_Q6_K = 18 ,
107
+ MOSTLY_Q6_K = 18 ,
108
108
109
109
/// <summary>
110
110
/// except 1d tensors
111
111
/// </summary>
112
- LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19 ,
112
+ MOSTLY_IQ2_XXS = 19 ,
113
113
114
114
/// <summary>
115
115
/// except 1d tensors
116
116
/// </summary>
117
- LLAMA_FTYPE_MOSTLY_IQ2_XS = 20 ,
117
+ MOSTLY_IQ2_XS = 20 ,
118
118
119
119
/// <summary>
120
120
/// except 1d tensors
121
121
/// </summary>
122
- LLAMA_FTYPE_MOSTLY_Q2_K_S = 21 ,
122
+ MOSTLY_Q2_K_S = 21 ,
123
123
124
124
/// <summary>
125
125
/// except 1d tensors
126
126
/// </summary>
127
- LLAMA_FTYPE_MOSTLY_IQ3_K_XS = 22 ,
127
+ MOSTLY_IQ3_K_XS = 22 ,
128
128
129
129
/// <summary>
130
130
/// except 1d tensors
131
131
/// </summary>
132
- LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23 ,
132
+ MOSTLY_IQ3_XXS = 23 ,
133
133
134
134
/// <summary>
135
135
/// except 1d tensors
136
136
/// </summary>
137
- LLAMA_FTYPE_MOSTLY_IQ1_S = 24 ,
137
+ MOSTLY_IQ1_S = 24 ,
138
138
139
139
/// <summary>
140
140
/// except 1d tensors
141
141
/// </summary>
142
- LLAMA_FTYPE_MOSTLY_IQ4_NL = 25 ,
142
+ MOSTLY_IQ4_NL = 25 ,
143
143
144
144
/// <summary>
145
145
/// except 1d tensors
146
146
/// </summary>
147
- LLAMA_FTYPE_MOSTLY_IQ3_S = 26 ,
147
+ MOSTLY_IQ3_S = 26 ,
148
148
149
149
/// <summary>
150
150
/// except 1d tensors
151
151
/// </summary>
152
- LLAMA_FTYPE_MOSTLY_IQ3_M = 27 ,
152
+ MOSTLY_IQ3_M = 27 ,
153
153
154
154
/// <summary>
155
155
/// except 1d tensors
156
156
/// </summary>
157
- LLAMA_FTYPE_MOSTLY_IQ2_S = 28 ,
157
+ MOSTLY_IQ2_S = 28 ,
158
158
159
159
/// <summary>
160
160
/// except 1d tensors
161
161
/// </summary>
162
- LLAMA_FTYPE_MOSTLY_IQ2_M = 29 ,
162
+ MOSTLY_IQ2_M = 29 ,
163
163
164
164
/// <summary>
165
165
/// except 1d tensors
166
166
/// </summary>
167
- LLAMA_FTYPE_MOSTLY_IQ4_XS = 30 ,
167
+ MOSTLY_IQ4_XS = 30 ,
168
168
169
169
/// <summary>
170
170
/// except 1d tensors
171
171
/// </summary>
172
- LLAMA_FTYPE_MOSTLY_IQ1_M = 31 ,
172
+ MOSTLY_IQ1_M = 31 ,
173
173
174
174
/// <summary>
175
175
/// except 1d tensors
176
176
/// </summary>
177
- LLAMA_FTYPE_MOSTLY_BF16 = 32 ,
177
+ MOSTLY_BF16 = 32 ,
178
+
179
+ /// <summary>
180
+ /// except 1d tensors
181
+ /// </summary>
182
+ MOSTLY_Q4_0_4_4 = 33 ,
183
+
184
+ /// <summary>
185
+ /// except 1d tensors
186
+ /// </summary>
187
+ MOSTLY_Q4_0_4_8 = 34 ,
188
+
189
+ /// <summary>
190
+ /// except 1d tensors
191
+ /// </summary>
192
+ MOSTLY_Q4_0_8_8 = 35 ,
178
193
179
194
/// <summary>
180
195
/// File type was not specified
181
196
/// </summary>
182
- LLAMA_FTYPE_GUESSED = 1024
197
+ GUESSED = 1024
183
198
}
184
199
}
0 commit comments