Vulkan MMQ Fix (ggml-org#8479)

0cc4m · arthw · commit 2d8a56d36588 · 2024-07-27T21:23:09.000+08:00
* Fix incoherence by adding missing LOAD_VEC_A parameter

* Fix Vulkan op result checker build error
diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp
@@ -6561,7 +6561,7 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
         ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
 
         vk_buffer buffer_gpu = extra->buffer_gpu.lock();
-        ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
+        ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
     }
 
     std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
@@ -6645,7 +6645,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
                 for (int i3 = 0; i3 < src0->ne[3]; i3++) {
                     for (int i2 = 0; i2 < src0->ne[2]; i2++) {
                         const int idx = i3*src0->ne[2] + i2;
-                        ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
+                        ggml_vk_buffer_read(buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
                     }
                 }
 
@@ -6658,7 +6658,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
                 if (offset + src0_size >= buffer_gpu->size) {
                     src0_size = buffer_gpu->size - offset;
                 }
-                ggml_vk_buffer_read(ctx, buffer_gpu, offset, src0_clone->data, src0_size);
+                ggml_vk_buffer_read(buffer_gpu, offset, src0_clone->data, src0_size);
                 memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
             }
         } else {
@@ -6687,7 +6687,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
                 for (int i3 = 0; i3 < src1->ne[3]; i3++) {
                     for (int i2 = 0; i2 < src1->ne[2]; i2++) {
                         const int idx = i3*src1->ne[2] + i2;
-                        ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
+                        ggml_vk_buffer_read(buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
                     }
                 }
 
@@ -6700,7 +6700,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
                 if (offset + src1_size >= buffer_gpu->size) {
                     src1_size = buffer_gpu->size - offset;
                 }
-                ggml_vk_buffer_read(ctx, buffer_gpu, offset, src1_clone->data, src1_size);
+                ggml_vk_buffer_read(buffer_gpu, offset, src1_clone->data, src1_size);
                 memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
             }
         } else {
@@ -6745,7 +6745,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
                 for (int i3 = 0; i3 < src2->ne[3]; i3++) {
                     for (int i2 = 0; i2 < src2->ne[2]; i2++) {
                         const int idx = i3*src2->ne[2] + i2;
-                        ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src2->nb[2], ((char *)src2_clone->data + idx * src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);
+                        ggml_vk_buffer_read(buffer_gpu, offset + idx * src2->nb[2], ((char *)src2_clone->data + idx * src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);
                     }
                 }
 
@@ -6758,7 +6758,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
                 if (offset + src2_size >= buffer_gpu->size) {
                     src2_size = buffer_gpu->size - offset;
                 }
-                ggml_vk_buffer_read(ctx, buffer_gpu, offset, src2_clone->data, src2_size);
+                ggml_vk_buffer_read(buffer_gpu, offset, src2_clone->data, src2_size);
                 memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
             }
         } else {
@@ -6922,7 +6922,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
             tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs);
         }
 
-        ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
+        ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
     }
 
     float first_error_result = -1.0f;
diff --git a/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
@@ -270,10 +270,10 @@ void matmul_shaders(std::vector<std::future<void>>& tasks, bool fp16, bool matmu
         std::string data_a_key = "DATA_A_" + to_uppercase(tname);
         std::string load_vec_a = (tname == "f32" || tname == "f16") ? load_vec : "2";
         tasks.push_back(std::async(std::launch::async, [=] {
-            string_to_spv(shader_name + "_" + tname + "_f32", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}), fp16);
+            string_to_spv(shader_name + "_" + tname + "_f32", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}), fp16);
         }));
         tasks.push_back(std::async(std::launch::async, [=] {
-            string_to_spv(shader_name + "_" + tname + "_f32_aligned", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "2"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}}), fp16);
+            string_to_spv(shader_name + "_" + tname + "_f32_aligned", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}}), fp16);
         }));
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -6561,7 +6561,7 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso`
`6561`	`6561`	`ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;`
`6562`	`6562`
`6563`	`6563`	`vk_buffer buffer_gpu = extra->buffer_gpu.lock();`
`6564`		`- ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);`
	`6564`	`+ ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);`
`6565`	`6565`	`}`
`6566`	`6566`
`6567`	`6567`	`std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;`
`@@ -6645,7 +6645,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *`
`6645`	`6645`	`for (int i3 = 0; i3 < src0->ne[3]; i3++) {`
`6646`	`6646`	`for (int i2 = 0; i2 < src0->ne[2]; i2++) {`
`6647`	`6647`	`const int idx = i3*src0->ne[2] + i2;`
`6648`		`- ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src0->nb[2], ((char )src0_clone->data + idx src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);`
	`6648`	`+ ggml_vk_buffer_read(buffer_gpu, offset + idx * src0->nb[2], ((char )src0_clone->data + idx src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);`
`6649`	`6649`	`}`
`6650`	`6650`	`}`
`6651`	`6651`
`@@ -6658,7 +6658,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *`
`6658`	`6658`	`if (offset + src0_size >= buffer_gpu->size) {`
`6659`	`6659`	`src0_size = buffer_gpu->size - offset;`
`6660`	`6660`	`}`
`6661`		`- ggml_vk_buffer_read(ctx, buffer_gpu, offset, src0_clone->data, src0_size);`
	`6661`	`+ ggml_vk_buffer_read(buffer_gpu, offset, src0_clone->data, src0_size);`
`6662`	`6662`	`memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);`
`6663`	`6663`	`}`
`6664`	`6664`	`} else {`
`@@ -6687,7 +6687,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *`
`6687`	`6687`	`for (int i3 = 0; i3 < src1->ne[3]; i3++) {`
`6688`	`6688`	`for (int i2 = 0; i2 < src1->ne[2]; i2++) {`
`6689`	`6689`	`const int idx = i3*src1->ne[2] + i2;`
`6690`		`- ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src1->nb[2], ((char )src1_clone->data + idx src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);`
	`6690`	`+ ggml_vk_buffer_read(buffer_gpu, offset + idx * src1->nb[2], ((char )src1_clone->data + idx src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);`
`6691`	`6691`	`}`
`6692`	`6692`	`}`
`6693`	`6693`
`@@ -6700,7 +6700,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *`
`6700`	`6700`	`if (offset + src1_size >= buffer_gpu->size) {`
`6701`	`6701`	`src1_size = buffer_gpu->size - offset;`
`6702`	`6702`	`}`
`6703`		`- ggml_vk_buffer_read(ctx, buffer_gpu, offset, src1_clone->data, src1_size);`
	`6703`	`+ ggml_vk_buffer_read(buffer_gpu, offset, src1_clone->data, src1_size);`
`6704`	`6704`	`memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);`
`6705`	`6705`	`}`
`6706`	`6706`	`} else {`
`@@ -6745,7 +6745,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *`
`6745`	`6745`	`for (int i3 = 0; i3 < src2->ne[3]; i3++) {`
`6746`	`6746`	`for (int i2 = 0; i2 < src2->ne[2]; i2++) {`
`6747`	`6747`	`const int idx = i3*src2->ne[2] + i2;`
`6748`		`- ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src2->nb[2], ((char )src2_clone->data + idx src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);`
	`6748`	`+ ggml_vk_buffer_read(buffer_gpu, offset + idx * src2->nb[2], ((char )src2_clone->data + idx src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);`
`6749`	`6749`	`}`
`6750`	`6750`	`}`
`6751`	`6751`
`@@ -6758,7 +6758,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *`
`6758`	`6758`	`if (offset + src2_size >= buffer_gpu->size) {`
`6759`	`6759`	`src2_size = buffer_gpu->size - offset;`
`6760`	`6760`	`}`
`6761`		`- ggml_vk_buffer_read(ctx, buffer_gpu, offset, src2_clone->data, src2_size);`
	`6761`	`+ ggml_vk_buffer_read(buffer_gpu, offset, src2_clone->data, src2_size);`
`6762`	`6762`	`memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);`
`6763`	`6763`	`}`
`6764`	`6764`	`} else {`
`@@ -6922,7 +6922,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *`
`6922`	`6922`	`tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs);`
`6923`	`6923`	`}`
`6924`	`6924`
`6925`		`- ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);`
	`6925`	`+ ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);`
`6926`	`6926`	`}`
`6927`	`6927`
`6928`	`6928`	`float first_error_result = -1.0f;`
Original file line number	Diff line number	Diff line change
`@@ -270,10 +270,10 @@ void matmul_shaders(std::vector<std::future<void>>& tasks, bool fp16, bool matmu`
`270`	`270`	`std::string data_a_key = "DATA_A_" + to_uppercase(tname);`
`271`	`271`	`std::string load_vec_a = (tname == "f32" \|\| tname == "f16") ? load_vec : "2";`
`272`	`272`	`tasks.push_back(std::async(std::launch::async, [=] {`
`273`		`- string_to_spv(shader_name + "_" + tname + "_f32", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}), fp16);`
	`273`	`+ string_to_spv(shader_name + "_" + tname + "_f32", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}), fp16);`
`274`	`274`	`}));`
`275`	`275`	`tasks.push_back(std::async(std::launch::async, [=] {`
`276`		`- string_to_spv(shader_name + "_" + tname + "_f32_aligned", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "2"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}}), fp16);`
	`276`	`+ string_to_spv(shader_name + "_" + tname + "_f32_aligned", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}}), fp16);`
`277`	`277`	`}));`
`278`	`278`	`}`
`279`	`279`	`}`