Skip to content

Commit 702e199

Browse files
committed
Merge branch 'master' into compilade/batch-splits
2 parents 0596a99 + 5fd89a7 commit 702e199

File tree

134 files changed

+6749
-2088
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+6749
-2088
lines changed

.github/workflows/bench.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ jobs:
129129
130130
- name: Server bench
131131
id: server_bench
132+
env:
133+
HEAD_REF: ${{ github.head_ref || github.ref_name }}
132134
run: |
133135
set -eux
134136
@@ -137,7 +139,7 @@ jobs:
137139
python bench.py \
138140
--runner-label ${{ env.RUNNER_LABEL }} \
139141
--name ${{ github.job }} \
140-
--branch ${{ github.head_ref || github.ref_name }} \
142+
--branch $HEAD_REF \
141143
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
142144
--scenario script.js \
143145
--duration ${{ github.event.inputs.duration || env.DURATION }} \

.github/workflows/build.yml

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
sysctl -a
4848
mkdir build
4949
cd build
50-
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF ..
50+
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF ..
5151
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
5252
5353
- name: Test
@@ -105,7 +105,7 @@ jobs:
105105
sysctl -a
106106
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
107107
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
108-
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
108+
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
109109
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
110110
111111
- name: Test
@@ -222,7 +222,7 @@ jobs:
222222
run: |
223223
mkdir build
224224
cd build
225-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
225+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
226226
cmake --build . --config Release -j $(nproc)
227227
228228
- name: Test
@@ -696,22 +696,20 @@ jobs:
696696
strategy:
697697
matrix:
698698
include:
699-
- build: 'rpc-x64'
700-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
701699
- build: 'noavx-x64'
702-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
700+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
703701
- build: 'avx2-x64'
704-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
702+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
705703
- build: 'avx-x64'
706-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
704+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
707705
- build: 'avx512-x64'
708-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
706+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
709707
- build: 'openblas-x64'
710-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
708+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
711709
- build: 'kompute-x64'
712-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
710+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
713711
- build: 'vulkan-x64'
714-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
712+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
715713
- build: 'llvm-arm64'
716714
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
717715
- build: 'msvc-arm64'

.github/workflows/python-check-requirements.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,13 @@ on:
66
- '.github/workflows/python-check-requirements.yml'
77
- 'scripts/check-requirements.sh'
88
- 'convert*.py'
9-
- 'requirements.txt'
10-
- 'requirements/*.txt'
9+
- '**/requirements*.txt'
1110
pull_request:
1211
paths:
1312
- '.github/workflows/python-check-requirements.yml'
1413
- 'scripts/check-requirements.sh'
1514
- 'convert*.py'
16-
- 'requirements.txt'
17-
- 'requirements/*.txt'
15+
- '**/requirements*.txt'
1816

1917
concurrency:
2018
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ models-mnt
7979
!models/ggml-vocab-*.gguf*
8080

8181
# Zig
82-
8382
zig-out/
8483
zig-cache/
8584

CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- Execute [the full CI locally on your machine](ci/README.md) before publishing
66
- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
77
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your convenience
8+
- Consider allowing write access to your branch for faster review
89
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
910

1011
# Pull requests (for collaborators)

Makefile

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ BUILD_TARGETS = \
1919
llama-imatrix \
2020
llama-infill \
2121
llama-llava-cli \
22+
llama-minicpmv-cli\
2223
llama-lookahead \
2324
llama-lookup \
2425
llama-lookup-create \
@@ -762,6 +763,10 @@ ifdef GGML_VULKAN_MEMORY_DEBUG
762763
MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
763764
endif
764765

766+
ifdef GGML_VULKAN_PERF
767+
MK_CPPFLAGS += -DGGML_VULKAN_PERF
768+
endif
769+
765770
ifdef GGML_VULKAN_VALIDATE
766771
MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
767772
endif
@@ -888,15 +893,16 @@ ggml/src/ggml-metal-embed.o: \
888893
ggml/src/ggml-common.h
889894
@echo "Embedding Metal library"
890895
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
891-
$(eval TEMP_ASSEMBLY=$(shell mktemp))
892-
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
893-
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
894-
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
895-
@echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
896-
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
897-
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
898-
@$(AS) $(TEMP_ASSEMBLY) -o $@
899-
@rm -f ${TEMP_ASSEMBLY}
896+
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
897+
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
898+
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
899+
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
900+
@echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
901+
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
902+
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
903+
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
904+
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
905+
@rmdir ${TEMP_ASSEMBLY}
900906
endif
901907
endif # GGML_METAL
902908

@@ -1205,6 +1211,7 @@ clean:
12051211
rm -rvf ggml/*.dll
12061212
rm -rvf ggml/*.so
12071213
rm -vrf ggml/src/*.o
1214+
rm -rvf ggml/src/llamafile/*.o
12081215
rm -rvf common/build-info.cpp
12091216
rm -vrf ggml/src/ggml-metal-embed.metal
12101217
rm -vrf ggml/src/ggml-cuda/*.o
@@ -1451,15 +1458,20 @@ libllava.a: examples/llava/llava.cpp \
14511458
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
14521459

14531460
llama-llava-cli: examples/llava/llava-cli.cpp \
1454-
examples/llava/clip.h \
1455-
examples/llava/clip.cpp \
1461+
examples/llava/llava.cpp \
14561462
examples/llava/llava.h \
1463+
examples/llava/clip.cpp \
1464+
examples/llava/clip.h \
1465+
$(OBJ_ALL)
1466+
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1467+
1468+
llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
14571469
examples/llava/llava.cpp \
1470+
examples/llava/llava.h \
1471+
examples/llava/clip.cpp \
1472+
examples/llava/clip.h \
14581473
$(OBJ_ALL)
1459-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1460-
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
1461-
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
1462-
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
1474+
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14631475

14641476
ifeq ($(UNAME_S),Darwin)
14651477
swift: examples/batched.swift

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,16 @@ Typically finetunes of the base models below are supported as well.
9595
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
9696
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
9797
- [x] [OLMo](https://allenai.org/olmo)
98+
- [x] [Granite models](https://huggingface.co/collections/ibm-granite/granite-code-models-6624c5cec322e4c148c8b330)
9899
- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia)
100+
- [x] [Snowflake-Arctic MoE](https://huggingface.co/collections/Snowflake/arctic-66290090abe542894a5ac520)
101+
- [x] [Smaug](https://huggingface.co/models?search=Smaug)
102+
- [x] [Poro 34B](https://huggingface.co/LumiOpen/Poro-34B)
103+
- [x] [Bitnet b1.58 models](https://huggingface.co/1bitLLM)
104+
- [x] [Flan T5](https://huggingface.co/models?search=flan-t5)
105+
- [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca)
99106
- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b)
107+
- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966)
100108

101109
(instructions for supporting more models: [HOWTO-add-model.md](./docs/development/HOWTO-add-model.md))
102110

@@ -145,6 +153,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
145153
- [Faraday](https://faraday.dev/) (proprietary)
146154
- [LMStudio](https://lmstudio.ai/) (proprietary)
147155
- [Layla](https://play.google.com/store/apps/details?id=com.laylalite) (proprietary)
156+
- [ramalama](https://github.com/containers/ramalama) (MIT)
148157
- [LocalAI](https://github.com/mudler/LocalAI) (MIT)
149158
- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL)
150159
- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile)
@@ -177,10 +186,12 @@ Unless otherwise noted these projects are open-source with permissive licensing:
177186

178187
- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML
179188
- [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption
189+
- [gpustack/gguf-parser](https://github.com/gpustack/gguf-parser-go/tree/main/cmd/gguf-parser) - review/check the GGUF file and estimate the memory usage
180190

181191
**Infrastructure:**
182192

183193
- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp
194+
- [GPUStack](https://github.com/gpustack/gpustack) - Manage GPU clusters for running LLMs
184195

185196
**Games:**
186197
- [Lucy's Labyrinth](https://github.com/MorganRO8/Lucys_Labyrinth) - A simple maze game where agents controlled by an AI model will try to trick you.

0 commit comments

Comments
 (0)