Skip to content

Commit 98725e8

Browse files
cebtenzzrejordankanter
authored andcommitted
kompute : llama-bench support and ggml_cpu_has_kompute() (ggml-org#5226)
1 parent bd365c5 commit 98725e8

File tree

5 files changed

+23
-10
lines changed

5 files changed

+23
-10
lines changed

common/common.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
15211521
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
15221522
fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false");
15231523
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
1524+
fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
15241525
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
15251526
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
15261527
fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false");

examples/llama-bench/llama-bench.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ struct test {
563563
static const bool cuda;
564564
static const bool opencl;
565565
static const bool vulkan;
566+
static const bool kompute;
566567
static const bool metal;
567568
static const bool gpu_blas;
568569
static const bool blas;
@@ -647,6 +648,9 @@ struct test {
647648
if (vulkan) {
648649
return "Vulkan";
649650
}
651+
if (kompute) {
652+
return "Kompute";
653+
}
650654
if (metal) {
651655
return "Metal";
652656
}
@@ -662,7 +666,7 @@ struct test {
662666
static const std::vector<std::string> & get_fields() {
663667
static const std::vector<std::string> fields = {
664668
"build_commit", "build_number",
665-
"cuda", "opencl", "vulkan", "metal", "gpu_blas", "blas",
669+
"cuda", "opencl", "vulkan", "kompute", "metal", "gpu_blas", "blas",
666670
"cpu_info", "gpu_info",
667671
"model_filename", "model_type", "model_size", "model_n_params",
668672
"n_batch", "n_threads", "type_k", "type_v",
@@ -686,8 +690,9 @@ struct test {
686690
field == "avg_ns" || field == "stddev_ns") {
687691
return INT;
688692
}
689-
if (field == "cuda" || field == "opencl" || field == "vulkan"|| field == "metal" || field == "gpu_blas" || field == "blas" ||
690-
field == "f16_kv" || field == "no_kv_offload" || field == "mul_mat_q") {
693+
if (field == "cuda" || field == "opencl" || field == "vulkan" || field == "kompute" || field == "metal" ||
694+
field == "gpu_blas" || field == "blas" || field == "f16_kv" || field == "no_kv_offload" ||
695+
field == "mul_mat_q") {
691696
return BOOL;
692697
}
693698
if (field == "avg_ts" || field == "stddev_ts") {
@@ -714,7 +719,8 @@ struct test {
714719
}
715720
std::vector<std::string> values = {
716721
build_commit, std::to_string(build_number),
717-
std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
722+
std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan),
723+
std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
718724
cpu_info, gpu_info,
719725
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
720726
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
@@ -743,6 +749,7 @@ const int test::build_number = LLAMA_BUILD_NUMBER;
743749
const bool test::cuda = !!ggml_cpu_has_cublas();
744750
const bool test::opencl = !!ggml_cpu_has_clblast();
745751
const bool test::vulkan = !!ggml_cpu_has_vulkan();
752+
const bool test::kompute = !!ggml_cpu_has_kompute();
746753
const bool test::metal = !!ggml_cpu_has_metal();
747754
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
748755
const bool test::blas = !!ggml_cpu_has_blas();

ggml.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20473,6 +20473,14 @@ int ggml_cpu_has_vulkan(void) {
2047320473
#endif
2047420474
}
2047520475

20476+
int ggml_cpu_has_kompute(void) {
20477+
#if defined(GGML_USE_KOMPUTE)
20478+
return 1;
20479+
#else
20480+
return 0;
20481+
#endif
20482+
}
20483+
2047620484
int ggml_cpu_has_sycl(void) {
2047720485
#if defined(GGML_USE_SYCL)
2047820486
return 1;
@@ -20482,7 +20490,8 @@ int ggml_cpu_has_sycl(void) {
2048220490
}
2048320491

2048420492
int ggml_cpu_has_gpublas(void) {
20485-
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_sycl();
20493+
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
20494+
ggml_cpu_has_sycl();
2048620495
}
2048720496

2048820497
int ggml_cpu_has_sse3(void) {

ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,7 @@ extern "C" {
22662266
GGML_API int ggml_cpu_has_cublas (void);
22672267
GGML_API int ggml_cpu_has_clblast (void);
22682268
GGML_API int ggml_cpu_has_vulkan (void);
2269+
GGML_API int ggml_cpu_has_kompute (void);
22692270
GGML_API int ggml_cpu_has_gpublas (void);
22702271
GGML_API int ggml_cpu_has_sse3 (void);
22712272
GGML_API int ggml_cpu_has_ssse3 (void);

llama.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6878,11 +6878,6 @@ static int llama_decode_internal(
68786878
n_threads = std::min(4, n_threads);
68796879
}
68806880

6881-
const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
6882-
if ((ggml_cpu_has_cublas() || ggml_cpu_has_vulkan()) && fully_offloaded) {
6883-
n_threads = 1;
6884-
}
6885-
68866881
#ifdef GGML_USE_MPI
68876882
const int64_t n_layer = hparams.n_layer;
68886883
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);

0 commit comments

Comments
 (0)