6
6
typedef uint16_t ggml_half;
7
7
typedef uint32_t ggml_half2;
8
8
9
- #define GGML_COMMON_AGGR
9
+ #define GGML_COMMON_AGGR_U
10
+ #define GGML_COMMON_AGGR_S
10
11
11
12
#define GGML_COMMON_DECL
12
13
#elif defined(GGML_COMMON_DECL_CPP)
@@ -15,7 +16,8 @@ typedef uint32_t ggml_half2;
15
16
typedef uint16_t ggml_half;
16
17
typedef uint32_t ggml_half2;
17
18
18
- #define GGML_COMMON_AGGR data
19
+ #define GGML_COMMON_AGGR_U data
20
+ #define GGML_COMMON_AGGR_S data
19
21
20
22
#define GGML_COMMON_DECL
21
23
#elif defined(GGML_COMMON_DECL_METAL)
@@ -24,7 +26,8 @@ typedef uint32_t ggml_half2;
24
26
typedef half ggml_half;
25
27
typedef half2 ggml_half2;
26
28
27
- #define GGML_COMMON_AGGR
29
+ #define GGML_COMMON_AGGR_U
30
+ #define GGML_COMMON_AGGR_S
28
31
29
32
#define GGML_COMMON_DECL
30
33
#elif defined(GGML_COMMON_DECL_CUDA)
@@ -38,7 +41,8 @@ typedef half2 ggml_half2;
38
41
typedef half ggml_half;
39
42
typedef half2 ggml_half2;
40
43
41
- #define GGML_COMMON_AGGR data
44
+ #define GGML_COMMON_AGGR_U
45
+ #define GGML_COMMON_AGGR_S data
42
46
43
47
#define GGML_COMMON_DECL
44
48
#elif defined(GGML_COMMON_DECL_HIP)
@@ -48,7 +52,8 @@ typedef half2 ggml_half2;
48
52
typedef half ggml_half;
49
53
typedef half2 ggml_half2;
50
54
51
- #define GGML_COMMON_AGGR data
55
+ #define GGML_COMMON_AGGR_U
56
+ #define GGML_COMMON_AGGR_S data
52
57
53
58
#define GGML_COMMON_DECL
54
59
#elif defined(GGML_COMMON_DECL_SYCL)
@@ -58,7 +63,8 @@ typedef half2 ggml_half2;
58
63
typedef sycl::half ggml_half;
59
64
typedef sycl::half2 ggml_half2;
60
65
61
- #define GGML_COMMON_AGGR data
66
+ #define GGML_COMMON_AGGR_U
67
+ #define GGML_COMMON_AGGR_S data
62
68
63
69
#define GGML_COMMON_DECL
64
70
#endif
@@ -163,9 +169,9 @@ typedef struct {
163
169
struct {
164
170
ggml_half d; // delta
165
171
ggml_half m; // min
166
- } GGML_COMMON_AGGR ;
172
+ } GGML_COMMON_AGGR_S ;
167
173
ggml_half2 dm;
168
- };
174
+ } GGML_COMMON_AGGR_U ;
169
175
uint8_t qs[QK4_1 / 2 ]; // nibbles / quants
170
176
} block_q4_1;
171
177
static_assert (sizeof (block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -184,9 +190,9 @@ typedef struct {
184
190
struct {
185
191
ggml_half d; // delta
186
192
ggml_half m; // min
187
- } GGML_COMMON_AGGR ;
193
+ } GGML_COMMON_AGGR_S ;
188
194
ggml_half2 dm;
189
- };
195
+ } GGML_COMMON_AGGR_U ;
190
196
uint8_t qh[4 ]; // 5-th bit of quants
191
197
uint8_t qs[QK5_1 / 2 ]; // nibbles / quants
192
198
} block_q5_1;
@@ -205,9 +211,9 @@ typedef struct {
205
211
struct {
206
212
ggml_half d; // delta
207
213
ggml_half s; // d * sum(qs[i])
208
- } GGML_COMMON_AGGR ;
214
+ } GGML_COMMON_AGGR_S ;
209
215
ggml_half2 ds;
210
- };
216
+ } GGML_COMMON_AGGR_U ;
211
217
int8_t qs[QK8_1]; // quants
212
218
} block_q8_1;
213
219
static_assert (sizeof (block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
@@ -270,9 +276,9 @@ typedef struct {
270
276
struct {
271
277
ggml_half d; // super-block scale for quantized scales
272
278
ggml_half dmin; // super-block scale for quantized mins
273
- } GGML_COMMON_AGGR ;
279
+ } GGML_COMMON_AGGR_S ;
274
280
ggml_half2 dm;
275
- };
281
+ } GGML_COMMON_AGGR_U ;
276
282
} block_q2_K;
277
283
static_assert (sizeof (block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
278
284
@@ -297,9 +303,9 @@ typedef struct {
297
303
struct {
298
304
ggml_half d; // super-block scale for quantized scales
299
305
ggml_half dmin; // super-block scale for quantized mins
300
- } GGML_COMMON_AGGR ;
306
+ } GGML_COMMON_AGGR_S ;
301
307
ggml_half2 dm;
302
- };
308
+ } GGML_COMMON_AGGR_U ;
303
309
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
304
310
uint8_t qs[QK_K/2 ]; // 4--bit quants
305
311
} block_q4_K;
@@ -314,9 +320,9 @@ typedef struct {
314
320
struct {
315
321
ggml_half d; // super-block scale for quantized scales
316
322
ggml_half dmin; // super-block scale for quantized mins
317
- } GGML_COMMON_AGGR ;
323
+ } GGML_COMMON_AGGR_S ;
318
324
ggml_half2 dm;
319
- };
325
+ } GGML_COMMON_AGGR_U ;
320
326
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
321
327
uint8_t qh[QK_K/8 ]; // quants, high bit
322
328
uint8_t qs[QK_K/2 ]; // quants, low 4 bits
0 commit comments