Skip to content

Commit 3df0b8d

Browse files
committed
Merge branch 'gg/flash-attn' of https://github.com/ggerganov/llama.cpp into flash-attn-cuda
2 parents 0afe47f + 8ad92dc commit 3df0b8d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+8084
-1980
lines changed

.ecrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"Exclude": ["^\\.gitmodules$"],
23
"Disable": {
34
"IndentSize": true
45
}

.github/workflows/build.yml

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ jobs:
337337
OPENCL_VERSION: 2023.04.17
338338
CLBLAST_VERSION: 1.6.0
339339
SDE_VERSION: 9.33.0-2024-01-07
340+
VULKAN_VERSION: 1.3.261.1
340341

341342
strategy:
342343
matrix:
@@ -353,6 +354,8 @@ jobs:
353354
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
354355
- build: 'openblas'
355356
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
357+
- build: 'kompute'
358+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
356359

357360
steps:
358361
- name: Clone
@@ -361,6 +364,12 @@ jobs:
361364
with:
362365
fetch-depth: 0
363366

367+
- name: Clone Kompute submodule
368+
id: clone_kompute
369+
if: ${{ matrix.build == 'kompute' }}
370+
run: |
371+
git submodule update --init kompute
372+
364373
- name: Download OpenCL SDK
365374
id: get_opencl
366375
if: ${{ matrix.build == 'clblast' }}
@@ -395,6 +404,15 @@ jobs:
395404
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
396405
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
397406
407+
- name: Install Vulkan SDK
408+
id: get_vulkan
409+
if: ${{ matrix.build == 'kompute' }}
410+
run: |
411+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
412+
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
413+
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
414+
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
415+
398416
- name: Build
399417
id: cmake_build
400418
run: |
@@ -432,7 +450,8 @@ jobs:
432450
433451
- name: Test
434452
id: cmake_test
435-
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
453+
# not all machines have native AVX-512
454+
if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
436455
run: |
437456
cd build
438457
ctest -L main -C Release --verbose --timeout 900
@@ -546,6 +565,31 @@ jobs:
546565
path: |
547566
cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
548567
568+
windows-latest-cmake-sycl:
569+
runs-on: windows-latest
570+
defaults:
571+
run:
572+
shell: bash
573+
574+
env:
575+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/62641e01-1e8d-4ace-91d6-ae03f7f8a71f/w_BaseKit_p_2024.0.0.49563_offline.exe
576+
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
577+
578+
579+
steps:
580+
- name: Clone
581+
id: checkout
582+
uses: actions/checkout@v3
583+
with:
584+
fetch-depth: 0
585+
586+
- name: Install
587+
run: scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
588+
589+
- name: Build
590+
id: cmake_build
591+
run: examples/sycl/win-build-sycl.bat
592+
549593
ios-xcode-build:
550594
runs-on: macos-latest
551595

.github/workflows/editorconfig.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
name: EditorConfig Checker
22

33
on:
4+
workflow_dispatch: # allows manual triggering
5+
inputs:
6+
create_release:
7+
description: 'Create new release'
8+
required: true
9+
type: boolean
410
push:
511
branches:
612
- master

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,4 @@ examples/jeopardy/results.txt
8989

9090
poetry.lock
9191
poetry.toml
92+
nppBackup

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "kompute"]
2+
path = kompute
3+
url = https://github.com/nomic-ai/kompute.git

CMakeLists.txt

Lines changed: 175 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ option(LLAMA_VULKAN "llama: use Vulkan"
103103
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
104104
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
105105
option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
106+
option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
106107
option(LLAMA_MPI "llama: use MPI" OFF)
107108
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
108109
option(LLAMA_SYCL "llama: use SYCL" OFF)
@@ -422,7 +423,13 @@ if (LLAMA_VULKAN)
422423
if (Vulkan_FOUND)
423424
message(STATUS "Vulkan found")
424425

426+
set(GGML_HEADERS_VULKAN ggml-vulkan.h)
427+
set(GGML_SOURCES_VULKAN ggml-vulkan.cpp)
428+
425429
add_library(ggml-vulkan STATIC ggml-vulkan.cpp ggml-vulkan.h)
430+
if (BUILD_SHARED_LIBS)
431+
set_target_properties(ggml-vulkan PROPERTIES POSITION_INDEPENDENT_CODE ON)
432+
endif()
426433
target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
427434

428435
add_compile_definitions(GGML_USE_VULKAN)
@@ -478,7 +485,6 @@ if (LLAMA_HIPBLAS)
478485
endif()
479486
endif()
480487

481-
482488
if (LLAMA_SYCL)
483489
if ( NOT DEFINED ENV{ONEAPI_ROOT})
484490
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
@@ -501,7 +507,165 @@ if (LLAMA_SYCL)
501507
set(GGML_HEADERS_SYCL ggml.h ggml-sycl.h)
502508
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
503509

504-
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
510+
if (WIN32)
511+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
512+
else()
513+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
514+
endif()
515+
endif()
516+
517+
if (LLAMA_KOMPUTE)
518+
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
519+
find_package(Vulkan COMPONENTS glslc REQUIRED)
520+
find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
521+
if (NOT glslc_executable)
522+
message(FATAL_ERROR "glslc not found")
523+
endif()
524+
525+
function(compile_shader)
526+
set(options)
527+
set(oneValueArgs)
528+
set(multiValueArgs SOURCES)
529+
cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
530+
foreach(source ${compile_shader_SOURCES})
531+
get_filename_component(filename ${source} NAME)
532+
set(spv_file ${filename}.spv)
533+
add_custom_command(
534+
OUTPUT ${spv_file}
535+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
536+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
537+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
538+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
539+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
540+
COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
541+
COMMENT "Compiling ${source} to ${spv_file}"
542+
)
543+
544+
get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
545+
set(FILE_NAME "shader${RAW_FILE_NAME}")
546+
string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
547+
string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
548+
string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
549+
set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
550+
message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
551+
if(CMAKE_GENERATOR MATCHES "Visual Studio")
552+
add_custom_command(
553+
OUTPUT ${OUTPUT_HEADER_FILE}
554+
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
555+
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
556+
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
557+
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
558+
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
559+
COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
560+
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
561+
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
562+
DEPENDS ${spv_file} xxd
563+
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
564+
)
565+
else()
566+
add_custom_command(
567+
OUTPUT ${OUTPUT_HEADER_FILE}
568+
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
569+
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
570+
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
571+
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
572+
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
573+
COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
574+
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
575+
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
576+
DEPENDS ${spv_file} xxd
577+
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
578+
)
579+
endif()
580+
endforeach()
581+
endfunction()
582+
583+
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
584+
message(STATUS "Kompute found")
585+
set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
586+
add_subdirectory(kompute)
587+
588+
# Compile our shaders
589+
compile_shader(SOURCES
590+
kompute-shaders/op_scale.comp
591+
kompute-shaders/op_scale_8.comp
592+
kompute-shaders/op_add.comp
593+
kompute-shaders/op_addrow.comp
594+
kompute-shaders/op_mul.comp
595+
kompute-shaders/op_silu.comp
596+
kompute-shaders/op_relu.comp
597+
kompute-shaders/op_gelu.comp
598+
kompute-shaders/op_softmax.comp
599+
kompute-shaders/op_norm.comp
600+
kompute-shaders/op_rmsnorm.comp
601+
kompute-shaders/op_diagmask.comp
602+
kompute-shaders/op_mul_mat_mat_f32.comp
603+
kompute-shaders/op_mul_mat_f16.comp
604+
kompute-shaders/op_mul_mat_q8_0.comp
605+
kompute-shaders/op_mul_mat_q4_0.comp
606+
kompute-shaders/op_mul_mat_q4_1.comp
607+
kompute-shaders/op_mul_mat_q6_k.comp
608+
kompute-shaders/op_getrows_f16.comp
609+
kompute-shaders/op_getrows_q4_0.comp
610+
kompute-shaders/op_getrows_q4_1.comp
611+
kompute-shaders/op_getrows_q6_k.comp
612+
kompute-shaders/op_rope_f16.comp
613+
kompute-shaders/op_rope_f32.comp
614+
kompute-shaders/op_cpy_f16_f16.comp
615+
kompute-shaders/op_cpy_f16_f32.comp
616+
kompute-shaders/op_cpy_f32_f16.comp
617+
kompute-shaders/op_cpy_f32_f32.comp
618+
)
619+
620+
# Create a custom target for our generated shaders
621+
add_custom_target(generated_shaders DEPENDS
622+
shaderop_scale.h
623+
shaderop_scale_8.h
624+
shaderop_add.h
625+
shaderop_addrow.h
626+
shaderop_mul.h
627+
shaderop_silu.h
628+
shaderop_relu.h
629+
shaderop_gelu.h
630+
shaderop_softmax.h
631+
shaderop_norm.h
632+
shaderop_rmsnorm.h
633+
shaderop_diagmask.h
634+
shaderop_mul_mat_mat_f32.h
635+
shaderop_mul_mat_f16.h
636+
shaderop_mul_mat_q8_0.h
637+
shaderop_mul_mat_q4_0.h
638+
shaderop_mul_mat_q4_1.h
639+
shaderop_mul_mat_q6_k.h
640+
shaderop_getrows_f16.h
641+
shaderop_getrows_q4_0.h
642+
shaderop_getrows_q4_1.h
643+
shaderop_getrows_q6_k.h
644+
shaderop_rope_f16.h
645+
shaderop_rope_f32.h
646+
shaderop_cpy_f16_f16.h
647+
shaderop_cpy_f16_f32.h
648+
shaderop_cpy_f32_f16.h
649+
shaderop_cpy_f32_f32.h
650+
)
651+
652+
# Create a custom command that depends on the generated_shaders
653+
add_custom_command(
654+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
655+
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
656+
DEPENDS generated_shaders
657+
COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
658+
)
659+
660+
# Add the stamp to the main sources to ensure dependency tracking
661+
set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
662+
set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
663+
add_compile_definitions(GGML_USE_KOMPUTE)
664+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
665+
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
666+
else()
667+
message(WARNING "Kompute not found")
668+
endif()
505669
endif()
506670

507671
function(get_flags CCID CCVER)
@@ -846,12 +1010,14 @@ add_library(ggml OBJECT
8461010
ggml-backend.h
8471011
ggml-quants.c
8481012
ggml-quants.h
849-
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
850-
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
851-
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
852-
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
853-
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
854-
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1013+
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1014+
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1015+
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1016+
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1017+
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1018+
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1019+
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1020+
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
8551021
)
8561022

8571023
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
@@ -928,7 +1094,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
9281094
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
9291095

9301096
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
931-
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
1097+
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}" "${GGML_HEADERS_VULKAN}"
9321098
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
9331099

9341100
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")

0 commit comments

Comments
 (0)