@@ -37,6 +37,23 @@ typedef struct {
3737} block_q4_3;
3838static_assert (sizeof (block_q4_3) == 2 * sizeof (ggml_fp16_t ) + QK4_3 / 2 , " wrong q4_3 block size/padding" );
3939
40+ #define QK5_0 32
41+ typedef struct {
42+ __half d; // delta
43+ uint8_t qh[4 ]; // 5-th bit of quants
44+ uint8_t qs[QK5_0 / 2 ]; // nibbles / quants
45+ } block_q5_0;
46+ static_assert (sizeof (block_q5_0) == sizeof (ggml_fp16_t ) + sizeof (uint32_t ) + QK5_0 / 2 , " wrong q5_0 block size/padding" );
47+
48+ #define QK5_1 32
49+ typedef struct {
50+ __half d; // delta
51+ __half m; // min
52+ uint32_t qh; // 5-th bit of quants
53+ uint8_t qs[QK5_1 / 2 ]; // nibbles / quants
54+ } block_q5_1;
55+ static_assert (sizeof (block_q5_1) == 2 * sizeof (ggml_fp16_t ) + sizeof (uint32_t ) + QK5_1 / 2 , " wrong q5_1 block size/padding" );
56+
4057#define QK8_0 32
4158typedef struct {
4259 float d; // delta
@@ -138,6 +155,64 @@ static __global__ void dequantize_block_q4_3(const void * vx, float * y) {
138155 }
139156}
140157
158+ static __global__ void dequantize_block_q5_0 (const void * vx, float * y) {
159+ const block_q5_0 * x = (const block_q5_0 *) vx;
160+
161+ const int i = blockIdx .x ;
162+
163+ const float d = x[i].d ;
164+
165+ const uint8_t * pp = x[i].qs ;
166+
167+ uint32_t qh;
168+ memcpy (&qh, x[i].qh , sizeof (qh));
169+
170+ for (int l = 0 ; l < QK5_0; l += 2 ) {
171+ const uint8_t vi = pp[l/2 ];
172+
173+ const int8_t vh0 = ((qh & (1 << (l + 0 ))) >> (l + 0 )) << 4 ;
174+ const int8_t vh1 = ((qh & (1 << (l + 1 ))) >> (l + 1 )) << 4 ;
175+
176+ const int8_t vi0 = ((vi & 0xf ) | vh0);
177+ const int8_t vi1 = ((vi >> 4 ) | vh1);
178+
179+ const float v0 = (vi0 - 16 )*d;
180+ const float v1 = (vi1 - 16 )*d;
181+
182+ y[i*QK5_0 + l + 0 ] = v0;
183+ y[i*QK5_0 + l + 1 ] = v1;
184+ }
185+ }
186+
187+ static __global__ void dequantize_block_q5_1 (const void * vx, float * y) {
188+ const block_q5_1 * x = (const block_q5_1 *) vx;
189+
190+ const int i = blockIdx .x ;
191+
192+ const float d = x[i].d ;
193+ const float m = x[i].m ;
194+
195+ const uint8_t * pp = x[i].qs ;
196+
197+ const uint32_t qh = x[i].qh ;
198+
199+ for (int l = 0 ; l < QK5_1; l += 2 ) {
200+ const uint8_t vi = pp[l/2 ];
201+
202+ const int8_t vh0 = ((qh & (1 << (l + 0 ))) >> (l + 0 )) << 4 ;
203+ const int8_t vh1 = ((qh & (1 << (l + 1 ))) >> (l + 1 )) << 4 ;
204+
205+ const int8_t vi0 = (vi & 0xf ) | vh0;
206+ const int8_t vi1 = (vi >> 4 ) | vh1;
207+
208+ const float v0 = vi0*d + m;
209+ const float v1 = vi1*d + m;
210+
211+ y[i*QK5_1 + l + 0 ] = v0;
212+ y[i*QK5_1 + l + 1 ] = v1;
213+ }
214+ }
215+
141216static __global__ void dequantize_block_q8_0 (const void * vx, float * y) {
142217 const block_q8_0 * x = (const block_q8_0 *) vx;
143218
@@ -174,6 +249,16 @@ void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t st
174249 dequantize_block_q4_3<<<nb, 1 , 0 , stream>>> (vx, y);
175250}
176251
252+ void dequantize_row_q5_0_cuda (const void * vx, float * y, int k, cudaStream_t stream) {
253+ const int nb = k / QK5_0;
254+ dequantize_block_q5_0<<<nb, 1 , 0 , stream>>> (vx, y);
255+ }
256+
257+ void dequantize_row_q5_1_cuda (const void * vx, float * y, int k, cudaStream_t stream) {
258+ const int nb = k / QK5_1;
259+ dequantize_block_q5_1<<<nb, 1 , 0 , stream>>> (vx, y);
260+ }
261+
177262void dequantize_row_q8_0_cuda (const void * vx, float * y, int k, cudaStream_t stream) {
178263 const int nb = k / QK8_0;
179264 dequantize_block_q8_0<<<nb, 1 , 0 , stream>>> (vx, y);
0 commit comments