Skip to content

Commit 9553e52

Browse files
committed
Merge remote-tracking branch 'upstream/concedo'
2 parents cac6650 + f036109 commit 9553e52

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+1988
-626
lines changed

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ build/
1616
build-em/
1717
build-debug/
1818
build-release/
19+
build-ci-debug/
20+
build-ci-release/
1921
build-static/
2022
build-cublas/
2123
build-opencl/
@@ -25,6 +27,10 @@ build-no-accel/
2527
build-sanitize-addr/
2628
build-sanitize-thread/
2729
out/
30+
tmp/
31+
32+
models/*
33+
models-mnt
2834

2935
/main
3036
/quantize
@@ -70,4 +76,4 @@ koboldcpp_openblas_noavx2.dll
7076
koboldcpp_clblast.dll
7177
koboldcpp_cublas.dll
7278
cublas64_11.dll
73-
cublasLt64_11.dll
79+
cublasLt64_11.dll

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ if (LLAMA_CUBLAS)
9898

9999
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
100100
if (LLAMA_CUDA_DMMV_F16)
101-
set(CMAKE_CUDA_ARCHITECTURES "61") # needed for f16 CUDA intrinsics
101+
set(CMAKE_CUDA_ARCHITECTURES "60;61") # needed for f16 CUDA intrinsics
102102
else()
103103
set(CMAKE_CUDA_ARCHITECTURES "52;61") # lowest CUDA 12 standard + lowest for integer intrinsics
104104
endif()

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ ifdef LLAMA_CUBLAS
144144
CUBLASLD_FLAGS = -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
145145
CUBLAS_OBJS = ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
146146
NVCC = nvcc
147-
NVCCFLAGS = --forward-unknown-to-host-compiler
147+
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
148148
ifdef CUDA_DOCKER_ARCH
149149
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
150150
else
@@ -401,7 +401,7 @@ koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common
401401
$(OPENBLAS_BUILD)
402402
koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o $(OBJS)
403403
$(FAILSAFE_BUILD)
404-
koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter.o k_quants_noavx2.o $(OBJS)
404+
koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o $(OBJS)
405405
$(OPENBLAS_NOAVX2_BUILD)
406406
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o $(OBJS)
407407
$(CLBLAST_BUILD)

README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,14 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin
3232
## Usage
3333
- **[Download the latest .exe release here](https://github.com/LostRuins/koboldcpp/releases/latest)** or clone the git repo.
3434
- Windows binaries are provided in the form of **koboldcpp.exe**, which is a pyinstaller wrapper for a few **.dll** files and **koboldcpp.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts.
35-
- Weights are not included, you can use the official llama.cpp `quantize.exe` to generate them from your official weight files (or download them from other places).
35+
- Weights are not included, you can use the official llama.cpp `quantize.exe` to generate them from your official weight files (or download them from other places such as [TheBloke's Huggingface](https://huggingface.co/TheBloke).
3636
- To run, execute **koboldcpp.exe** or drag and drop your quantized `ggml_model.bin` file onto the .exe, and then connect with Kobold or Kobold Lite. If you're not on windows, then run the script **KoboldCpp.py** after compiling the libraries.
37+
- Launching with no command line arguments displays a GUI containing a subset of configurable settings. Generally you dont have to change much besides the `Presets` and `GPU Layers`. Read the `--help` for more info about each settings.
3738
- By default, you can connect to http://localhost:5001
3839
- You can also run it using the command line `koboldcpp.exe [ggml_model.bin] [port]`. For info, please check `koboldcpp.exe --help`
39-
- Big context still too slow? Try the `--smartcontext` flag to reduce prompt processing frequency. Also, you can try to run with your GPU using CLBlast, with `--useclblast` flag for a speedup
40-
- Want even more speedup? Combine `--useclblast` with `--gpulayers` to offload entire layers to the GPU! **Much faster, but uses more VRAM**. Experiment to determine number of layers to offload.
40+
- Default context size to small? Try `--contextsize 3072` to 1.5x your context size! without much perplexity gain. Note that you'll have to increase the max context in the Kobold Lite UI as well (click and edit the number text field).
41+
- Big context too slow? Try the `--smartcontext` flag to reduce prompt processing frequency. Also, you can try to run with your GPU using CLBlast, with `--useclblast` flag for a speedup
42+
- Want even more speedup? Combine `--useclblast` with `--gpulayers` to offload entire layers to the GPU! **Much faster, but uses more VRAM**. Experiment to determine number of layers to offload, and reduce by a few if you run out of memory.
4143
- If you are having crashes or issues, you can try turning off BLAS with the `--noblas` flag. You can also try running in a non-avx2 compatibility mode with `--noavx2`. Lastly, you can try turning off mmap with `--nommap`.
4244

4345
For more information, be sure to run the program with the `--help` flag.
@@ -71,12 +73,14 @@ For more information, be sure to run the program with the `--help` flag.
7173
- See https://github.com/ggerganov/llama.cpp/pull/1828/files
7274

7375
## CuBLAS?
76+
- If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecublas` flag, make sure you select the correct .exe with CUDA support.
7477
- You can attempt a CuBLAS build with `LLAMA_CUBLAS=1` or using the provided CMake file (best for visual studio users). If you use the CMake file to build, copy the `koboldcpp_cublas.dll` generated into the same directory as the `koboldcpp.py` file. If you are bundling executables, you may need to include CUDA dynamic libraries (such as `cublasLt64_11.dll` and `cublas64_11.dll`) in order for the executable to work correctly on a different PC. Note that support for CuBLAS is limited.
7578

7679
## Considerations
7780
- For Windows: No installation, single file executable, (It Just Works)
7881
- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS.
7982
- Since v1.15, requires CLBlast if enabled, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without CLBlast.
83+
- Since v1.33, you can set the context size to be above what the model supports officially. It does increases perplexity but should still work well below 4096 even on untuned models. (For GPT-NeoX, GPT-J, and LLAMA models) Customize this with `--ropeconfig`.
8084
- **I plan to keep backwards compatibility with ALL past llama.cpp AND alpaca.cpp models**. But you are also encouraged to reconvert/update your models if possible for best results.
8185

8286
## License

ci/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# CI
2+
3+
In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework:
4+
5+
https://github.com/ggml-org/ci
6+
7+
It monitors the `master` branch for new commits and runs the
8+
[ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
9+
to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
10+
to cover various hardware architectures, including GPU and Apple Silicon instances.
11+
12+
Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
13+
Only the branches of this repo are monitored for this keyword.
14+
15+
It is a good practice, before publishing changes to execute the full CI locally on your machine:
16+
17+
```bash
18+
mkdir tmp
19+
bash ./ci/run.sh ./tmp/results ./tmp/mnt
20+
```

ci/run.sh

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
#/bin/bash
2+
3+
if [ -z "$2" ]; then
4+
echo "usage: $0 <output-dir> <mnt-dir>"
5+
exit 1
6+
fi
7+
8+
mkdir -p "$1"
9+
mkdir -p "$2"
10+
11+
OUT=$(realpath "$1")
12+
MNT=$(realpath "$2")
13+
14+
rm -v $OUT/*.log
15+
rm -v $OUT/*.exit
16+
rm -v $OUT/*.md
17+
18+
sd=`dirname $0`
19+
cd $sd/../
20+
SRC=`pwd`
21+
22+
## helpers
23+
24+
# download a file if it does not exist or if it is outdated
25+
function gg_wget {
26+
local out=$1
27+
local url=$2
28+
29+
local cwd=`pwd`
30+
31+
mkdir -p $out
32+
cd $out
33+
34+
# should not re-download if file is the same
35+
wget -nv -N $url
36+
37+
cd $cwd
38+
}
39+
40+
function gg_printf {
41+
printf -- "$@" >> $OUT/README.md
42+
}
43+
44+
function gg_run {
45+
ci=$1
46+
47+
set -o pipefail
48+
set -x
49+
50+
gg_run_$ci | tee $OUT/$ci.log
51+
cur=$?
52+
echo "$cur" > $OUT/$ci.exit
53+
54+
set +x
55+
set +o pipefail
56+
57+
gg_sum_$ci
58+
59+
ret=$((ret | cur))
60+
}
61+
62+
## ci
63+
64+
# ctest_debug
65+
66+
function gg_run_ctest_debug {
67+
cd ${SRC}
68+
69+
rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug
70+
71+
set -e
72+
73+
(time cmake -DCMAKE_BUILD_TYPE=Debug .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
74+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
75+
76+
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
77+
78+
set +e
79+
}
80+
81+
function gg_sum_ctest_debug {
82+
gg_printf '### %s\n\n' "${ci}"
83+
84+
gg_printf 'Runs ctest in debug mode\n'
85+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
86+
gg_printf '```\n'
87+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
88+
gg_printf '```\n'
89+
gg_printf '\n'
90+
}
91+
92+
# ctest_release
93+
94+
function gg_run_ctest_release {
95+
cd ${SRC}
96+
97+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
98+
99+
set -e
100+
101+
(time cmake -DCMAKE_BUILD_TYPE=Release .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
102+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
103+
104+
if [ -z $GG_BUILD_LOW_PERF ]; then
105+
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
106+
else
107+
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
108+
fi
109+
110+
set +e
111+
}
112+
113+
function gg_sum_ctest_release {
114+
gg_printf '### %s\n\n' "${ci}"
115+
116+
gg_printf 'Runs ctest in release mode\n'
117+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
118+
gg_printf '```\n'
119+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
120+
gg_printf '```\n'
121+
}
122+
123+
# open_llama_3b_v2
124+
125+
function gg_run_open_llama_3b_v2 {
126+
cd ${SRC}
127+
128+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json
129+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model
130+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json
131+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json
132+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
133+
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
134+
135+
gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
136+
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
137+
head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
138+
139+
path_models="../models-mnt/open-llama/3B-v2"
140+
path_wiki="../models-mnt/wikitext/wikitext-2-raw"
141+
142+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
143+
144+
set -e
145+
146+
(time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
147+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
148+
149+
python3 ../convert.py ${path_models}
150+
151+
model_f16="${path_models}/ggml-model-f16.bin"
152+
model_q8_0="${path_models}/ggml-model-q8_0.bin"
153+
model_q4_0="${path_models}/ggml-model-q4_0.bin"
154+
model_q4_1="${path_models}/ggml-model-q4_1.bin"
155+
model_q5_0="${path_models}/ggml-model-q5_0.bin"
156+
model_q5_1="${path_models}/ggml-model-q5_1.bin"
157+
model_q3_k="${path_models}/ggml-model-q3_k.bin"
158+
model_q4_k="${path_models}/ggml-model-q4_k.bin"
159+
model_q5_k="${path_models}/ggml-model-q5_k.bin"
160+
model_q6_k="${path_models}/ggml-model-q6_k.bin"
161+
162+
wiki_test_60="${path_wiki}/wiki.test-60.raw"
163+
164+
./bin/quantize ${model_f16} ${model_q8_0} q8_0
165+
./bin/quantize ${model_f16} ${model_q4_0} q4_0
166+
./bin/quantize ${model_f16} ${model_q4_1} q4_1
167+
./bin/quantize ${model_f16} ${model_q5_0} q5_0
168+
./bin/quantize ${model_f16} ${model_q5_1} q5_1
169+
./bin/quantize ${model_f16} ${model_q3_k} q3_k
170+
./bin/quantize ${model_f16} ${model_q4_k} q4_k
171+
./bin/quantize ${model_f16} ${model_q5_k} q5_k
172+
./bin/quantize ${model_f16} ${model_q6_k} q6_k
173+
174+
(time ./bin/main --model ${model_f16} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
175+
(time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
176+
(time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
177+
(time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
178+
(time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
179+
(time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
180+
(time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
181+
(time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
182+
(time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
183+
(time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
184+
185+
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
186+
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
187+
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
188+
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
189+
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
190+
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
191+
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
192+
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
193+
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
194+
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
195+
196+
function check_ppl {
197+
qnt="$1"
198+
ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
199+
200+
if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
201+
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
202+
return 20
203+
fi
204+
205+
printf ' - %s @ %s OK\n' "$qnt" "$ppl"
206+
return 0
207+
}
208+
209+
check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
210+
check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
211+
check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
212+
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
213+
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
214+
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
215+
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
216+
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
217+
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
218+
check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
219+
220+
set +e
221+
}
222+
223+
function gg_sum_open_llama_3b_v2 {
224+
gg_printf '### %s\n\n' "${ci}"
225+
226+
gg_printf 'OpenLLaMA 3B-v2:\n'
227+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
228+
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
229+
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
230+
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
231+
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
232+
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
233+
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
234+
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
235+
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
236+
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
237+
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
238+
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
239+
}
240+
241+
## main
242+
243+
if [ -z $GG_BUILD_LOW_PERF ]; then
244+
rm -rf ${SRC}/models-mnt
245+
246+
mnt_models=$(realpath ${MNT}/models)
247+
mkdir -p ${mnt_models}
248+
ln -sfn ${mnt_models} ${SRC}/models-mnt
249+
250+
python3 -m pip install -r ${SRC}/requirements.txt
251+
fi
252+
253+
ret=0
254+
255+
#test $ret -eq 0 && gg_run ctest_debug
256+
#test $ret -eq 0 && gg_run ctest_release
257+
258+
if [ -z $GG_BUILD_LOW_PERF ]; then
259+
test $ret -eq 0 && gg_run open_llama_3b_v2
260+
fi
261+
262+
exit $ret

convert-lora-to-ggml.py

100644100755
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!/usr/bin/env python
12
import json
23
import os
34
import re

convert.py

100644100755
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!/usr/bin/env python
12
import argparse
23
import concurrent.futures
34
import copy

cudart64_110.dll

-15.5 KB
Binary file not shown.

examples/baby-llama/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
set(TARGET baby-llama)
22
add_executable(${TARGET} baby-llama.cpp)
3+
install(TARGETS ${TARGET} RUNTIME)
34
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
45
target_compile_features(${TARGET} PRIVATE cxx_std_11)

examples/benchmark/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
set(TARGET benchmark)
22
add_executable(${TARGET} benchmark-matmult.cpp)
3+
install(TARGETS ${TARGET} RUNTIME)
34
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
45
target_compile_features(${TARGET} PRIVATE cxx_std_11)
56
if(TARGET BUILD_INFO)

0 commit comments

Comments
 (0)