66typedef uint16_t ggml_half;
77typedef uint32_t ggml_half2;
88
9- #define GGML_COMMON_AGGR
9+ #define GGML_COMMON_AGGR_U
10+ #define GGML_COMMON_AGGR_S
1011
1112#define GGML_COMMON_DECL
1213#elif defined(GGML_COMMON_DECL_CPP)
@@ -15,7 +16,8 @@ typedef uint32_t ggml_half2;
1516typedef uint16_t ggml_half;
1617typedef uint32_t ggml_half2;
1718
18- #define GGML_COMMON_AGGR data
19+ #define GGML_COMMON_AGGR_U data
20+ #define GGML_COMMON_AGGR_S data
1921
2022#define GGML_COMMON_DECL
2123#elif defined(GGML_COMMON_DECL_METAL)
@@ -24,7 +26,8 @@ typedef uint32_t ggml_half2;
2426typedef half ggml_half;
2527typedef half2 ggml_half2;
2628
27- #define GGML_COMMON_AGGR
29+ #define GGML_COMMON_AGGR_U
30+ #define GGML_COMMON_AGGR_S
2831
2932#define GGML_COMMON_DECL
3033#elif defined(GGML_COMMON_DECL_CUDA)
@@ -38,7 +41,8 @@ typedef half2 ggml_half2;
3841typedef half ggml_half;
3942typedef half2 ggml_half2;
4043
41- #define GGML_COMMON_AGGR data
44+ #define GGML_COMMON_AGGR_U
45+ #define GGML_COMMON_AGGR_S data
4246
4347#define GGML_COMMON_DECL
4448#elif defined(GGML_COMMON_DECL_HIP)
@@ -48,7 +52,8 @@ typedef half2 ggml_half2;
4852typedef half ggml_half;
4953typedef half2 ggml_half2;
5054
51- #define GGML_COMMON_AGGR data
55+ #define GGML_COMMON_AGGR_U
56+ #define GGML_COMMON_AGGR_S data
5257
5358#define GGML_COMMON_DECL
5459#elif defined(GGML_COMMON_DECL_SYCL)
@@ -58,7 +63,8 @@ typedef half2 ggml_half2;
5863typedef sycl::half ggml_half;
5964typedef sycl::half2 ggml_half2;
6065
61- #define GGML_COMMON_AGGR data
66+ #define GGML_COMMON_AGGR_U
67+ #define GGML_COMMON_AGGR_S data
6268
6369#define GGML_COMMON_DECL
6470#endif
@@ -163,9 +169,9 @@ typedef struct {
163169 struct {
164170 ggml_half d; // delta
165171 ggml_half m; // min
166- } GGML_COMMON_AGGR ;
172+ } GGML_COMMON_AGGR_S ;
167173 ggml_half2 dm;
168- };
174+ } GGML_COMMON_AGGR_U ;
169175 uint8_t qs[QK4_1 / 2 ]; // nibbles / quants
170176} block_q4_1;
171177static_assert (sizeof (block_q4_1) == 2 * sizeof (ggml_half) + QK4_1 / 2 , " wrong q4_1 block size/padding" );
@@ -184,9 +190,9 @@ typedef struct {
184190 struct {
185191 ggml_half d; // delta
186192 ggml_half m; // min
187- } GGML_COMMON_AGGR ;
193+ } GGML_COMMON_AGGR_S ;
188194 ggml_half2 dm;
189- };
195+ } GGML_COMMON_AGGR_U ;
190196 uint8_t qh[4 ]; // 5-th bit of quants
191197 uint8_t qs[QK5_1 / 2 ]; // nibbles / quants
192198} block_q5_1;
@@ -205,9 +211,9 @@ typedef struct {
205211 struct {
206212 ggml_half d; // delta
207213 ggml_half s; // d * sum(qs[i])
208- } GGML_COMMON_AGGR ;
214+ } GGML_COMMON_AGGR_S ;
209215 ggml_half2 ds;
210- };
216+ } GGML_COMMON_AGGR_U ;
211217 int8_t qs[QK8_1]; // quants
212218} block_q8_1;
213219static_assert (sizeof (block_q8_1) == 2 *sizeof (ggml_half) + QK8_1, " wrong q8_1 block size/padding" );
@@ -270,9 +276,9 @@ typedef struct {
270276 struct {
271277 ggml_half d; // super-block scale for quantized scales
272278 ggml_half dmin; // super-block scale for quantized mins
273- } GGML_COMMON_AGGR ;
279+ } GGML_COMMON_AGGR_S ;
274280 ggml_half2 dm;
275- };
281+ } GGML_COMMON_AGGR_U ;
276282} block_q2_K;
277283static_assert (sizeof (block_q2_K) == 2 *sizeof (ggml_half) + QK_K/16 + QK_K/4 , " wrong q2_K block size/padding" );
278284
@@ -297,9 +303,9 @@ typedef struct {
297303 struct {
298304 ggml_half d; // super-block scale for quantized scales
299305 ggml_half dmin; // super-block scale for quantized mins
300- } GGML_COMMON_AGGR ;
306+ } GGML_COMMON_AGGR_S ;
301307 ggml_half2 dm;
302- };
308+ } GGML_COMMON_AGGR_U ;
303309 uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
304310 uint8_t qs[QK_K/2 ]; // 4--bit quants
305311} block_q4_K;
@@ -314,9 +320,9 @@ typedef struct {
314320 struct {
315321 ggml_half d; // super-block scale for quantized scales
316322 ggml_half dmin; // super-block scale for quantized mins
317- } GGML_COMMON_AGGR ;
323+ } GGML_COMMON_AGGR_S ;
318324 ggml_half2 dm;
319- };
325+ } GGML_COMMON_AGGR_U ;
320326 uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
321327 uint8_t qh[QK_K/8 ]; // quants, high bit
322328 uint8_t qs[QK_K/2 ]; // quants, low 4 bits
0 commit comments