@@ -37,6 +37,13 @@ typedef struct {
37
37
} block_q4_3;
38
38
static_assert (sizeof (block_q4_3) == 2 * sizeof (ggml_fp16_t ) + QK4_3 / 2 , " wrong q4_3 block size/padding" );
39
39
40
+ #define QK8_0 32
41
+ typedef struct {
42
+ float d; // delta
43
+ int8_t qs[QK8_0]; // quants
44
+ } block_q8_0;
45
+ static_assert (sizeof (block_q8_0) == sizeof (float ) + QK8_0, " wrong q8_0 block size/padding" );
46
+
40
47
static __global__ void dequantize_block_q4_0 (const void * vx, float * y) {
41
48
const block_q4_0 * x = (const block_q4_0 *) vx;
42
49
@@ -131,6 +138,22 @@ static __global__ void dequantize_block_q4_3(const void * vx, float * y) {
131
138
}
132
139
}
133
140
141
+ static __global__ void dequantize_block_q8_0 (const void * vx, float * y) {
142
+ const block_q8_0 * x = (const block_q8_0 *) vx;
143
+
144
+ const int i = blockIdx .x ;
145
+
146
+ const float d = x[i].d ;
147
+
148
+ const int8_t * pp = x[i].qs ;
149
+
150
+ for (int l = 0 ; l < QK8_0; l++) {
151
+ const int8_t vi = pp[l];
152
+
153
+ y[i*QK8_0 + l] = vi*d;
154
+ }
155
+ }
156
+
134
157
void dequantize_row_q4_0_cuda (const void * vx, float * y, int k, cudaStream_t stream) {
135
158
const int nb = k / QK4_0;
136
159
dequantize_block_q4_0<<<nb, 1 , 0 , stream>>> (vx, y);
@@ -151,6 +174,11 @@ void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t st
151
174
dequantize_block_q4_3<<<nb, 1 , 0 , stream>>> (vx, y);
152
175
}
153
176
177
+ void dequantize_row_q8_0_cuda (const void * vx, float * y, int k, cudaStream_t stream) {
178
+ const int nb = k / QK8_0;
179
+ dequantize_block_q8_0<<<nb, 1 , 0 , stream>>> (vx, y);
180
+ }
181
+
154
182
// buffer pool for cuda
155
183
#define MAX_CUDA_BUFFERS 16
156
184
0 commit comments