shibe2
diff --git a/‎CMakeLists.txt
Lines changed: 20 additions & 3 deletions b/‎CMakeLists.txt
Lines changed: 20 additions & 3 deletions
diff --git a/‎Makefile
Lines changed: 17 additions & 0 deletions b/‎Makefile
Lines changed: 17 additions & 0 deletions
diff --git a/‎common/common.cpp
Lines changed: 1 addition & 0 deletions b/‎common/common.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/llama-bench/llama-bench.cpp
Lines changed: 8 additions & 3 deletions b/‎examples/llama-bench/llama-bench.cpp
Lines changed: 8 additions & 3 deletions
diff --git a/‎flake.nix
Lines changed: 1 addition & 0 deletions b/‎flake.nix
Lines changed: 1 addition & 0 deletions
@@ -111,6 +111,7 @@ option(LLAMA_CUDA_FA_ALL_QUANTS              "llama: compile all quants for Flas
 option(LLAMA_CURL                            "llama: use libcurl to download model from an URL" OFF)
 option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
 option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
+option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
 option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
 option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
 option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
@@ -503,6 +504,22 @@ if (LLAMA_RPC)
     set(GGML_SOURCES_RPC ggml-rpc.cpp)
 endif()
 
+if (LLAMA_CLBLAST)
+    find_package(CLBlast)
+    if (CLBlast_FOUND)
+        message(STATUS "CLBlast found")
+
+        set(GGML_HEADERS_OPENCL ggml-opencl.h)
+        set(GGML_SOURCES_OPENCL ggml-opencl.cpp)
+
+        add_compile_definitions(GGML_USE_CLBLAST)
+
+        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
+    else()
+        message(WARNING "CLBlast not found")
+    endif()
+endif()
+
 if (LLAMA_VULKAN)
     find_package(Vulkan)
     if (Vulkan_FOUND)
@@ -1252,6 +1269,7 @@ add_library(ggml OBJECT
             ggml-quants.c
             ggml-quants.h
             ${GGML_SOURCES_CUDA}      ${GGML_HEADERS_CUDA}
+            ${GGML_SOURCES_OPENCL}    ${GGML_HEADERS_OPENCL}
             ${GGML_SOURCES_METAL}     ${GGML_HEADERS_METAL}
             ${GGML_SOURCES_RPC}       ${GGML_HEADERS_RPC}
             ${GGML_SOURCES_EXTRA}     ${GGML_HEADERS_EXTRA}
@@ -1339,9 +1357,8 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
 
 set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
-        "${GGML_HEADERS_CUDA}"
-        "${GGML_HEADERS_METAL}"
-        "${GGML_HEADERS_EXTRA}")
+        "${GGML_HEADERS_CUDA}"  "${GGML_HEADERS_OPENCL}"
+        "${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
 
 set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
 install(TARGETS ggml PUBLIC_HEADER)
 
@@ -548,6 +548,23 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h
 	$(NVCC_COMPILE)
 endif # LLAMA_CUDA
 
+ifdef LLAMA_CLBLAST
+	MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
+	MK_CFLAGS   += $(shell pkg-config --cflags-only-other clblast OpenCL)
+	MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
+
+	# Mac provides OpenCL as a framework
+	ifeq ($(UNAME_S),Darwin)
+		MK_LDFLAGS += -lclblast -framework OpenCL
+	else
+		MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
+	endif
+	OBJS    += ggml-opencl.o
+
+ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
+	$(CXX) $(CXXFLAGS) -c $< -o $@
+endif # LLAMA_CLBLAST
+
 ifdef LLAMA_VULKAN
 	MK_CPPFLAGS  += -DGGML_USE_VULKAN
 	MK_LDFLAGS += -lvulkan
 
@@ -3242,6 +3242,7 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
     fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
     fprintf(stream, "cpu_has_cuda: %s\n",        ggml_cpu_has_cuda()        ? "true" : "false");
     fprintf(stream, "cpu_has_vulkan: %s\n",      ggml_cpu_has_vulkan()      ? "true" : "false");
+    fprintf(stream, "cpu_has_clblast: %s\n",     ggml_cpu_has_clblast()     ? "true" : "false");
     fprintf(stream, "cpu_has_kompute: %s\n",     ggml_cpu_has_kompute()     ? "true" : "false");
     fprintf(stream, "cpu_has_fma: %s\n",         ggml_cpu_has_fma()         ? "true" : "false");
     fprintf(stream, "cpu_has_gpublas: %s\n",     ggml_cpu_has_gpublas()     ? "true" : "false");
 
@@ -709,6 +709,7 @@ struct test {
     static const std::string build_commit;
     static const int build_number;
     static const bool cuda;
+    static const bool opencl;
     static const bool vulkan;
     static const bool kompute;
     static const bool metal;
@@ -797,6 +798,9 @@ struct test {
         if (cuda) {
             return GGML_CUDA_NAME;
         }
+        if (opencl) {
+            return "OpenCL";
+        }
         if (vulkan) {
             return "Vulkan";
         }
@@ -825,7 +829,7 @@ struct test {
     static const std::vector<std::string> & get_fields() {
         static const std::vector<std::string> fields = {
             "build_commit", "build_number",
-            "cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas", "blas",
+            "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas", "blas",
             "cpu_info", "gpu_info",
             "model_filename", "model_type", "model_size", "model_n_params",
             "n_batch", "n_ubatch",
@@ -851,7 +855,7 @@ struct test {
             field == "avg_ns" || field == "stddev_ns") {
             return INT;
         }
-        if (field == "cuda" || field == "vulkan" || field == "kompute" || field == "metal" ||
+        if (field == "cuda" || field == "opencl"  || field == "vulkan" || field == "kompute" || field == "metal" ||
             field == "gpu_blas" || field == "blas" || field == "sycl" ||field == "f16_kv" || field == "no_kv_offload" ||
             field == "flash_attn" || field == "use_mmap" || field == "embeddings") {
             return BOOL;
@@ -880,7 +884,7 @@ struct test {
         }
         std::vector<std::string> values = {
             build_commit, std::to_string(build_number),
-            std::to_string(cuda), std::to_string(vulkan), std::to_string(vulkan),
+            std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan),
             std::to_string(metal), std::to_string(sycl), std::to_string(rpc), std::to_string(gpu_blas), std::to_string(blas),
             cpu_info, gpu_info,
             model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
@@ -909,6 +913,7 @@ struct test {
 const std::string test::build_commit = LLAMA_COMMIT;
 const int         test::build_number = LLAMA_BUILD_NUMBER;
 const bool        test::cuda         = !!ggml_cpu_has_cuda();
+const bool        test::opencl       = !!ggml_cpu_has_clblast();
 const bool        test::vulkan       = !!ggml_cpu_has_vulkan();
 const bool        test::kompute      = !!ggml_cpu_has_kompute();
 const bool        test::metal        = !!ggml_cpu_has_metal();
 
@@ -159,6 +159,7 @@
                 windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
               }
               // lib.optionalAttrs pkgs.stdenv.isLinux {
+                opencl = config.packages.default.override { useOpenCL = true; };
                 cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;
 
                 mpi-cpu = config.packages.default.override { useMpi = true; };
Original file line number	Diff line number	Diff line change
`@@ -159,6 +159,7 @@`
`159`	`159`	`windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;`
`160`	`160`	`}`
`161`	`161`	`// lib.optionalAttrs pkgs.stdenv.isLinux {`
	`162`	`+ opencl = config.packages.default.override { useOpenCL = true; };`
`162`	`163`	`cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;`
`163`	`164`
`164`	`165`	`mpi-cpu = config.packages.default.override { useMpi = true; };`