Skip to content

Commit 2d8a56d

Browse files
0cc4marthw
authored andcommitted
Vulkan MMQ Fix (ggml-org#8479)
* Fix incoherence by adding missing LOAD_VEC_A parameter * Fix Vulkan op result checker build error
1 parent 09cc684 commit 2d8a56d

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

ggml/src/ggml-vulkan.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6561,7 +6561,7 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
65616561
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
65626562

65636563
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
6564-
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
6564+
ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
65656565
}
65666566

65676567
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
@@ -6645,7 +6645,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
66456645
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
66466646
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
66476647
const int idx = i3*src0->ne[2] + i2;
6648-
ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
6648+
ggml_vk_buffer_read(buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
66496649
}
66506650
}
66516651

@@ -6658,7 +6658,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
66586658
if (offset + src0_size >= buffer_gpu->size) {
66596659
src0_size = buffer_gpu->size - offset;
66606660
}
6661-
ggml_vk_buffer_read(ctx, buffer_gpu, offset, src0_clone->data, src0_size);
6661+
ggml_vk_buffer_read(buffer_gpu, offset, src0_clone->data, src0_size);
66626662
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
66636663
}
66646664
} else {
@@ -6687,7 +6687,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
66876687
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
66886688
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
66896689
const int idx = i3*src1->ne[2] + i2;
6690-
ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
6690+
ggml_vk_buffer_read(buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
66916691
}
66926692
}
66936693

@@ -6700,7 +6700,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
67006700
if (offset + src1_size >= buffer_gpu->size) {
67016701
src1_size = buffer_gpu->size - offset;
67026702
}
6703-
ggml_vk_buffer_read(ctx, buffer_gpu, offset, src1_clone->data, src1_size);
6703+
ggml_vk_buffer_read(buffer_gpu, offset, src1_clone->data, src1_size);
67046704
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
67056705
}
67066706
} else {
@@ -6745,7 +6745,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
67456745
for (int i3 = 0; i3 < src2->ne[3]; i3++) {
67466746
for (int i2 = 0; i2 < src2->ne[2]; i2++) {
67476747
const int idx = i3*src2->ne[2] + i2;
6748-
ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src2->nb[2], ((char *)src2_clone->data + idx * src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);
6748+
ggml_vk_buffer_read(buffer_gpu, offset + idx * src2->nb[2], ((char *)src2_clone->data + idx * src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);
67496749
}
67506750
}
67516751

@@ -6758,7 +6758,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
67586758
if (offset + src2_size >= buffer_gpu->size) {
67596759
src2_size = buffer_gpu->size - offset;
67606760
}
6761-
ggml_vk_buffer_read(ctx, buffer_gpu, offset, src2_clone->data, src2_size);
6761+
ggml_vk_buffer_read(buffer_gpu, offset, src2_clone->data, src2_size);
67626762
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
67636763
}
67646764
} else {
@@ -6922,7 +6922,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
69226922
tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs);
69236923
}
69246924

6925-
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
6925+
ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
69266926
}
69276927

69286928
float first_error_result = -1.0f;

ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,10 +270,10 @@ void matmul_shaders(std::vector<std::future<void>>& tasks, bool fp16, bool matmu
270270
std::string data_a_key = "DATA_A_" + to_uppercase(tname);
271271
std::string load_vec_a = (tname == "f32" || tname == "f16") ? load_vec : "2";
272272
tasks.push_back(std::async(std::launch::async, [=] {
273-
string_to_spv(shader_name + "_" + tname + "_f32", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}), fp16);
273+
string_to_spv(shader_name + "_" + tname + "_f32", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}), fp16);
274274
}));
275275
tasks.push_back(std::async(std::launch::async, [=] {
276-
string_to_spv(shader_name + "_" + tname + "_f32_aligned", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "2"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}}), fp16);
276+
string_to_spv(shader_name + "_" + tname + "_f32_aligned", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}}), fp16);
277277
}));
278278
}
279279
}

0 commit comments

Comments
 (0)