Skip to content

Commit f42df4d

Browse files
Merge branch 'sycl' of https://github.com/abhilash1910/llama.cpp into sycl
2 parents b6ec810 + 947884f commit f42df4d

File tree

123 files changed

+14439
-5615
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+14439
-5615
lines changed

.devops/nix/package.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ effectiveStdenv.mkDerivation (
159159

160160
cmakeFlags =
161161
[
162-
(cmakeBool "LLAMA_NATIVE" true)
162+
(cmakeBool "LLAMA_NATIVE" false)
163163
(cmakeBool "LLAMA_BUILD_SERVER" true)
164164
(cmakeBool "BUILD_SHARED_LIBS" true)
165165
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)

.github/workflows/build.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,31 @@ jobs:
515515
- name: Build Xcode project
516516
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
517517

518+
android-build:
519+
runs-on: ubuntu-latest
520+
521+
steps:
522+
- name: Clone
523+
uses: actions/checkout@v3
524+
525+
- name: Set up JDK
526+
uses: actions/setup-java@v3
527+
with:
528+
java-version: 17
529+
distribution: zulu
530+
531+
- name: Setup Android SDK
532+
uses: android-actions/setup-android@v3
533+
with:
534+
log-accepted-android-sdk-licenses: false
535+
536+
- name: Build
537+
run: |
538+
cd examples/llama.android
539+
540+
# Skip armeabi-v7a for now (https://github.com/llvm/llvm-project/issues/65820).
541+
./gradlew build --no-daemon -Pskip-armeabi-v7a
542+
518543
# freeBSD-latest:
519544
# runs-on: macos-12
520545
# steps:

.github/workflows/nix-ci-aarch64.yml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: Nix aarch64 builds
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', '**/*.sh', '**/*.py', '**/*.nix']
9+
pull_request:
10+
types: [opened, synchronize, reopened]
11+
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', '**/*.sh', '**/*.py', '**/*.nix']
12+
13+
jobs:
14+
nix-build-aarch64:
15+
if: ${{ vars.CACHIX_NAME != '' }}
16+
runs-on: ubuntu-latest
17+
steps:
18+
- name: Checkout repository
19+
uses: actions/checkout@v4
20+
- name: Install QEMU
21+
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
22+
run: |
23+
sudo apt-get update
24+
sudo apt-get install -y qemu-user-static qemu-system-aarch64
25+
sudo usermod -a -G kvm $USER
26+
- name: Install Nix
27+
uses: DeterminateSystems/nix-installer-action@v9
28+
with:
29+
github-token: ${{ secrets.GITHUB_TOKEN }}
30+
extra-conf: |
31+
extra-platforms = aarch64-linux
32+
extra-system-features = nixos-test kvm
33+
extra-substituters = https://${{ vars.CACHIX_NAME }}.cachix.org https://cuda-maintainers.cachix.org
34+
extra-trusted-public-keys = ${{ vars.CACHIX_PUBLIC_KEY }} cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
35+
- uses: DeterminateSystems/magic-nix-cache-action@v2
36+
with:
37+
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
38+
- name: Set-up cachix to push the results to
39+
uses: cachix/cachix-action@v13
40+
with:
41+
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
42+
name: ${{ vars.CACHIX_NAME }}
43+
- name: Show all output paths
44+
run: >
45+
nix run github:nix-community/nix-eval-jobs
46+
-- --gc-roots-dir gcroot
47+
--flake
48+
".#packages.aarch64-linux"
49+
- name: Build
50+
run: >
51+
nix run github:Mic92/nix-fast-build
52+
-- --skip-cached --no-nom
53+
--systems aarch64-linux
54+
--flake
55+
".#checks.aarch64-linux"

.github/workflows/nix-ci.yml

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -69,44 +69,3 @@ jobs:
6969
-- --skip-cached --no-nom
7070
--flake
7171
".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
72-
nix-build-aarch64:
73-
if: ${{ vars.CACHIX_NAME != '' }}
74-
runs-on: ubuntu-latest
75-
steps:
76-
- name: Checkout repository
77-
uses: actions/checkout@v4
78-
- name: Install QEMU
79-
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
80-
run: |
81-
sudo apt-get install -y qemu-user-static qemu-system-aarch64
82-
sudo usermod -a -G kvm $USER
83-
- name: Install Nix
84-
uses: DeterminateSystems/nix-installer-action@v9
85-
with:
86-
github-token: ${{ secrets.GITHUB_TOKEN }}
87-
extra-conf: |
88-
extra-platforms = aarch64-linux
89-
extra-system-features = nixos-test kvm
90-
extra-substituters = https://${{ vars.CACHIX_NAME }}.cachix.org https://cuda-maintainers.cachix.org
91-
extra-trusted-public-keys = ${{ vars.CACHIX_PUBLIC_KEY }} cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
92-
- uses: DeterminateSystems/magic-nix-cache-action@v2
93-
with:
94-
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
95-
- name: Set-up cachix to push the results to
96-
uses: cachix/cachix-action@v13
97-
with:
98-
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
99-
name: ${{ vars.CACHIX_NAME }}
100-
- name: Show all output paths
101-
run: >
102-
nix run github:nix-community/nix-eval-jobs
103-
-- --gc-roots-dir gcroot
104-
--flake
105-
".#packages.aarch64-linux"
106-
- name: Build
107-
run: >
108-
nix run github:Mic92/nix-fast-build
109-
-- --skip-cached --no-nom
110-
--systems aarch64-linux
111-
--flake
112-
".#checks.aarch64-linux"

.github/workflows/nix-flake-update.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ jobs:
1919
pr-labels: |
2020
nix
2121
pr-reviewers: philiptaron,SomeoneSerge
22-
token: ${{ secrets.GITHUB_TOKEN }}
22+
token: ${{ secrets.FLAKE_TOKEN }}

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ models-mnt
4343
/embedding
4444
/gguf
4545
/gguf-llama-simple
46+
/imatrix
4647
/infill
4748
/libllama.so
4849
/llama-bench
@@ -104,3 +105,4 @@ poetry.toml
104105
/tests/test-tokenizer-1-bpe
105106
/tests/test-rope
106107
/tests/test-backend-ops
108+
/tests/test-autorelease

CMakeLists.txt

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.13) # for add_link_options
1+
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
22
project("llama.cpp" C CXX)
33
include(CheckIncludeFileCXX)
44

@@ -48,6 +48,7 @@ option(BUILD_SHARED_LIBS "build shared libraries"
4848
option(LLAMA_STATIC "llama: static link libraries" OFF)
4949
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
5050
option(LLAMA_LTO "llama: enable link time optimization" OFF)
51+
option(LLAMA_CCACHE "llama: use ccache if available" ON)
5152

5253
# debug
5354
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
@@ -77,6 +78,10 @@ if (NOT MSVC)
7778
option(LLAMA_F16C "llama: enable F16C" ${INS_ENB})
7879
endif()
7980

81+
if (WIN32)
82+
option(LLAMA_WIN_VER "llama: Windows Version" 0x602)
83+
endif()
84+
8085
# 3rd party libs
8186
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
8287
option(LLAMA_BLAS "llama: use BLAS" OFF)
@@ -96,7 +101,6 @@ option(LLAMA_HIP_UMA "llama: use HIP unified memory arch
96101
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
97102
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
98103
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
99-
option(LLAMA_SYCL "llama: use SYCL" OFF)
100104
option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
101105
option(LLAMA_MPI "llama: use MPI" OFF)
102106
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
@@ -595,6 +599,17 @@ if (LLAMA_LTO)
595599
endif()
596600
endif()
597601

602+
if (LLAMA_CCACHE)
603+
find_program(LLAMA_CCACHE_FOUND ccache)
604+
if (LLAMA_CCACHE_FOUND)
605+
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
606+
set(ENV{CCACHE_SLOPPINESS} time_macros)
607+
message(STATUS "Using ccache")
608+
else()
609+
message(STATUS "Warning: ccache not found - consider installing it or use LLAMA_CCACHE=OFF")
610+
endif ()
611+
endif()
612+
598613
# this version of Apple ld64 is buggy
599614
execute_process(
600615
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
@@ -628,6 +643,13 @@ if (NOT MSVC)
628643
endif()
629644
endif()
630645

646+
function(add_compile_option_cpp ARG)
647+
# Adds a compile option to C/C++ only, but not for Cuda.
648+
# Use, e.g., for CPU-architecture flags.
649+
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${ARG}>)
650+
add_compile_options($<$<COMPILE_LANGUAGE:C>:${ARG}>)
651+
endfunction()
652+
631653
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
632654
message(STATUS "ARM detected")
633655
if (MSVC)
@@ -662,8 +684,7 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
662684
include(cmake/FindSIMD.cmake)
663685
endif ()
664686
if (LLAMA_AVX512)
665-
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX512>)
666-
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
687+
add_compile_option_cpp(/arch:AVX512)
667688
# MSVC has no compile-time flags enabling specific
668689
# AVX512 extensions, neither it defines the
669690
# macros corresponding to the extensions.
@@ -677,37 +698,35 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
677698
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
678699
endif()
679700
elseif (LLAMA_AVX2)
680-
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX2>)
681-
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX2>)
701+
add_compile_option_cpp(/arch:AVX2)
682702
elseif (LLAMA_AVX)
683-
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX>)
684-
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
703+
add_compile_option_cpp(/arch:AVX)
685704
endif()
686705
else()
687706
if (LLAMA_NATIVE)
688-
add_compile_options(-march=native)
707+
add_compile_option_cpp(-march=native)
689708
endif()
690709
if (LLAMA_F16C)
691-
add_compile_options(-mf16c)
710+
add_compile_option_cpp(-mf16c)
692711
endif()
693712
if (LLAMA_FMA)
694-
add_compile_options(-mfma)
713+
add_compile_option_cpp(-mfma)
695714
endif()
696715
if (LLAMA_AVX)
697-
add_compile_options(-mavx)
716+
add_compile_option_cpp(-mavx)
698717
endif()
699718
if (LLAMA_AVX2)
700-
add_compile_options(-mavx2)
719+
add_compile_option_cpp(-mavx2)
701720
endif()
702721
if (LLAMA_AVX512)
703-
add_compile_options(-mavx512f)
704-
add_compile_options(-mavx512bw)
722+
add_compile_option_cpp(-mavx512f)
723+
add_compile_option_cpp(-mavx512bw)
705724
endif()
706725
if (LLAMA_AVX512_VBMI)
707-
add_compile_options(-mavx512vbmi)
726+
add_compile_option_cpp(-mavx512vbmi)
708727
endif()
709728
if (LLAMA_AVX512_VNNI)
710-
add_compile_options(-mavx512vnni)
729+
add_compile_option_cpp(-mavx512vnni)
711730
endif()
712731
endif()
713732
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
@@ -724,7 +743,7 @@ endif()
724743

725744
if (MINGW)
726745
# Target Windows 8 for PrefetchVirtualMemory
727-
add_compile_definitions(_WIN32_WINNT=0x602)
746+
add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})
728747
endif()
729748

730749
#
@@ -877,7 +896,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
877896
${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
878897
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
879898

880-
set(GGML_PUBLIC_HEADERS "ggml.h"
899+
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
881900
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
882901
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
883902

Makefile

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Define the default target now so that it is always the first target
22
BUILD_TARGETS = \
3-
main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
3+
main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
44
simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
55
speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o
66

@@ -9,7 +9,7 @@ TEST_TARGETS = \
99
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
1010
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
1111
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
12-
tests/test-backend-ops
12+
tests/test-backend-ops tests/test-autorelease
1313

1414
# Code coverage output files
1515
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -43,10 +43,6 @@ ifeq ($(UNAME_S),Darwin)
4343
endif
4444
endif
4545

46-
ifneq '' '$(or $(filter clean,$(MAKECMDGOALS)),$(LLAMA_METAL))'
47-
BUILD_TARGETS += metal
48-
endif
49-
5046
default: $(BUILD_TARGETS)
5147

5248
test: $(TEST_TARGETS)
@@ -614,6 +610,9 @@ quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.
614610
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
615611
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
616612

613+
imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
614+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
615+
617616
embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
618617
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
619618

@@ -668,11 +667,6 @@ lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
668667
passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
669668
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
670669

671-
ifdef LLAMA_METAL
672-
metal: examples/metal/metal.cpp ggml.o $(OBJS)
673-
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
674-
endif
675-
676670
ifeq ($(UNAME_S),Darwin)
677671
swift: examples/batched.swift
678672
(cd examples/batched.swift; make build)
@@ -753,3 +747,6 @@ tests/test-c.o: tests/test-c.c llama.h
753747

754748
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
755749
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
750+
751+
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
752+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

Package.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ let package = Package(
1414
.library(name: "llama", targets: ["llama"]),
1515
],
1616
dependencies: [
17-
.package(url: "https://github.com/ggerganov/ggml.git", .branch("master"))
17+
.package(url: "https://github.com/ggerganov/ggml.git", .branch("release"))
1818
],
1919
targets: [
2020
.target(

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ as the main playground for developing new features for the [ggml](https://github
128128
- React Native: [mybigday/llama.rn](https://github.com/mybigday/llama.rn)
129129
- Java: [kherud/java-llama.cpp](https://github.com/kherud/java-llama.cpp)
130130
- Zig: [deins/llama.cpp.zig](https://github.com/Deins/llama.cpp.zig)
131+
- Flutter/Dart: [netdur/llama_cpp_dart](https://github.com/netdur/llama_cpp_dart)
131132

132133
**UI:**
133134

awq-py/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Example for llama model
4343
# For llama7b and llama2 models
4444
python convert.py models/llama-7b/ --awq-path awq_cache/llama-7b-w4-g128.pt --outfile models/llama_7b_fp16.gguf
4545
# For mistral and mpt models
46-
python convert-hf-to-gguf.py models/mpt-7b/ --awq-path awq_cache/llama-7b-w4-g128.pt --outfile models/mpt_7b_fp16.gguf
46+
python convert-hf-to-gguf.py models/mpt-7b/ --awq-path awq_cache/mpt-7b-w4-g128.pt --outfile models/mpt_7b_fp16.gguf
4747
```
4848

4949
## Quantize

0 commit comments

Comments
 (0)