Skip to content

Commit 751fcfc

Browse files
authored
Vulkan IQ4_NL Support (#8613)
* Fix Vulkan matmul tests compile errors * Add Vulkan IQ4_NL support * Fix Vulkan DeepSeek-Coder-V2-Lite MoE support
1 parent 46e4741 commit 751fcfc

File tree

7 files changed

+219
-190
lines changed

7 files changed

+219
-190
lines changed

ggml/src/ggml-vulkan.cpp

Lines changed: 140 additions & 182 deletions
Large diffs are not rendered by default.

ggml/src/vulkan-shaders/dequant_funcs.comp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,11 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
5858
return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d;
5959
}
6060
#endif
61+
62+
#if defined(DATA_A_IQ4_NL)
63+
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
64+
const float d = float(data_a[a_offset + ib].d);
65+
const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
66+
return vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d;
67+
}
68+
#endif
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#version 450
2+
3+
#include "dequant_head.comp"
4+
5+
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
6+
7+
layout (binding = 0) readonly buffer A {block_iq4_nl data_a[];};
8+
layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
9+
10+
void main() {
11+
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;
12+
13+
const uint tid = gl_LocalInvocationID.x % 64;
14+
const uint il = tid/32;
15+
const uint ir = tid%32;
16+
const uint ib = 32*i + ir;
17+
if (ib >= p.nel / 32) {
18+
return;
19+
}
20+
21+
const uint q_idx = 8*il;
22+
const uint b_idx = 1024*i + 32*ir + q_idx;
23+
24+
const float d = float(data_a[ib].d);
25+
26+
[[unroll]] for (uint l = 0; l < 8; ++l) {
27+
data_b[b_idx + l + 0] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] & 0xF]);
28+
data_b[b_idx + l + 16] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] >> 4]);
29+
}
30+
}

ggml/src/vulkan-shaders/dequant_q4_0.comp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,13 @@ void main() {
1818
return;
1919
}
2020

21-
const uint b_idx = 1024*i + 32*ir + 8*il;
21+
const uint q_idx = 8*il;
22+
const uint b_idx = 1024*i + 32*ir + q_idx;
2223

2324
const float d = float(data_a[ib].d);
24-
const float dm = -8.0f * d;
25-
26-
const uint q_idx = 8*il;
2725

2826
[[unroll]] for (uint l = 0; l < 8; ++l) {
29-
data_b[b_idx + l + 0] = D_TYPE(d * (data_a[ib].qs[q_idx + l] & 0xF) + dm);
30-
data_b[b_idx + l + 16] = D_TYPE(d * (data_a[ib].qs[q_idx + l] >> 4) + dm);
27+
data_b[b_idx + l + 0] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] & 0xF) - 8.0f));
28+
data_b[b_idx + l + 16] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] >> 4) - 8.0f));
3129
}
3230
}

ggml/src/vulkan-shaders/mul_mm.comp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ shared FLOAT_TYPE buf_a[BM * (BK+1)];
7171
shared FLOAT_TYPE buf_b[BN * (BK+1)];
7272

7373
#ifdef MUL_MAT_ID
74-
shared u16vec2 row_ids[2048];
74+
shared u16vec2 row_ids[3072];
7575
#endif
7676

7777
void main() {
@@ -380,6 +380,19 @@ void main() {
380380

381381
buf_a[buf_idx ] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32));
382382
buf_a[buf_idx + 1] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));
383+
#elif defined(DATA_A_IQ4_NL)
384+
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
385+
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a;
386+
387+
const uint ib = idx / 16;
388+
const uint iqs = idx & 0xF;
389+
390+
const float d = float(data_a[ib].d);
391+
const uint vui = uint(data_a[ib].qs[iqs]);
392+
const vec2 v = vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d;
393+
394+
buf_a[buf_idx ] = FLOAT_TYPE(v.x);
395+
buf_a[buf_idx + 16] = FLOAT_TYPE(v.y);
383396
#endif
384397
}
385398
[[unroll]] for (uint l = 0; l < BN; l += loadstride_b) {

ggml/src/vulkan-shaders/types.comp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,3 +177,24 @@ struct block_q6_K
177177

178178
#define A_TYPE block_q6_K
179179
#endif
180+
181+
// IQuants
182+
183+
#if defined(DATA_A_IQ4_NL)
184+
#extension GL_EXT_shader_16bit_storage : require
185+
#define QUANT_K 32
186+
#define QUANT_R 2
187+
188+
struct block_iq4_nl
189+
{
190+
float16_t d;
191+
uint8_t qs[QUANT_K/2];
192+
};
193+
194+
#define A_TYPE block_iq4_nl
195+
196+
const int8_t kvalues_iq4nl[16] = {
197+
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
198+
int8_t(1), int8_t(13), int8_t(25), int8_t(38), int8_t(53), int8_t(69), int8_t(89), int8_t(113)
199+
};
200+
#endif

ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ const std::vector<std::string> type_names = {
5252
"q3_k",
5353
"q4_k",
5454
"q5_k",
55-
"q6_k"
55+
"q6_k",
56+
"iq4_nl"
5657
};
5758

5859
void execute_command(const std::string& command, std::string& stdout_str, std::string& stderr_str) {

0 commit comments

Comments
 (0)